Merge branch 'master' into replxx

This commit is contained in:
alexey-milovidov 2020-01-11 19:59:33 +03:00 committed by GitHub
commit f051844eb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
176 changed files with 3680 additions and 1824 deletions

2
.github/CODEOWNERS vendored
View File

@ -1,4 +1,2 @@
dbms/* @ClickHouse/core-assigner
utils/* @ClickHouse/core-assigner
docs/* @ClickHouse/docs
docs/zh/* @ClickHouse/docs-zh

File diff suppressed because it is too large Load Diff

View File

@ -95,6 +95,8 @@ if (CMAKE_GENERATOR STREQUAL "Ninja")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
endif ()
include (cmake/add_warning.cmake)
if (NOT MSVC)
set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror is also added inside directories with our own code.
endif ()
@ -224,8 +226,8 @@ else ()
set(NOT_UNBUNDLED 1)
endif ()
# Using system libs can cause lot of warnings in includes (on macro expansion).
if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32)
# Using system libs can cause a lot of warnings in includes (on macro expansion).
if (UNBUNDLED OR NOT (OS_LINUX OR OS_DARWIN) OR ARCH_32)
option (NO_WERROR "Disable -Werror compiler option" ON)
endif ()

View File

@ -11,3 +11,7 @@ ClickHouse is an open-source column-oriented database management system that all
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
* [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-february-meetup-registration-88496227599) on February 5.

18
cmake/add_warning.cmake Normal file
View File

@ -0,0 +1,18 @@
include (CheckCXXSourceCompiles)
# Try to add -Wflag if compiler supports it
macro (add_warning flag)
string (REPLACE "-" "_" underscored_flag ${flag})
string (REPLACE "+" "x" underscored_flag ${underscored_flag})
check_cxx_compiler_flag("-W${flag}" SUPPORTS_FLAG_${underscored_flag})
if (SUPPORTS_FLAG_${underscored_flag})
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W${flag}")
else ()
message (WARNING "Flag -W${flag} is unsupported")
endif ()
endmacro ()
# Try to add -Wno flag if compiler supports it
macro (no_warning flag)
add_warning(no-${flag})
endmacro ()

View File

@ -13,12 +13,12 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# Minimal supported SDK version
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.15")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.15")
# Global libraries

2
contrib/googletest vendored

@ -1 +1 @@
Subproject commit d175c8bf823e709d570772b038757fadf63bc632
Subproject commit 703bd9caab50b139428cea1aaff9974ebee5742e

View File

@ -54,13 +54,12 @@ endif ()
target_compile_options(cxx PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-nostdinc++>)
check_cxx_compiler_flag(-Wreserved-id-macro HAVE_WARNING_RESERVED_ID_MACRO)
if (HAVE_WARNING_RESERVED_ID_MACRO)
if (SUPPORTS_FLAG_no_reserved_id_macro)
target_compile_options(cxx PUBLIC -Wno-reserved-id-macro)
endif ()
check_cxx_compiler_flag(-Wctad-maybe-unsupported HAVE_WARNING_CTAD_MAYBE_UNSUPPORTED)
if (HAVE_WARNING_CTAD_MAYBE_UNSUPPORTED)
if (SUPPORTS_FLAG_no_ctad_maybe_unsupported)
target_compile_options(cxx PUBLIC -Wno-ctad-maybe-unsupported)
endif ()

View File

@ -45,36 +45,75 @@ endif ()
option (WEVERYTHING "Enables -Weverything option with some exceptions. This is intended for exploration of new compiler warnings that may be found to be useful. Only makes sense for clang." ON)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wpedantic -Wno-vla-extension -Wno-zero-length-array -Wno-gnu-anonymous-struct -Wno-nested-anon-types")
if (COMPILER_CLANG)
add_warning(pedantic)
no_warning(gnu-anonymous-struct)
no_warning(nested-anon-types)
no_warning(vla-extension)
no_warning(zero-length-array)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow -Wshadow-uncaptured-local -Wextra-semi -Wcomma -Winconsistent-missing-destructor-override -Wunused-exception-parameter -Wcovered-switch-default -Wold-style-cast -Wrange-loop-analysis -Wunused-member-function -Wunreachable-code -Wunreachable-code-return -Wnewline-eof -Wembedded-directive -Wgnu-case-range -Wunused-macros -Wconditional-uninitialized -Wdeprecated -Wundef -Wreserved-id-macro -Wredundant-parens -Wzero-as-null-pointer-constant")
add_warning(comma)
add_warning(conditional-uninitialized)
add_warning(covered-switch-default)
add_warning(deprecated)
add_warning(embedded-directive)
add_warning(empty-init-stmt) # linux-only
add_warning(extra-semi-stmt) # linux-only
add_warning(extra-semi)
add_warning(gnu-case-range)
add_warning(inconsistent-missing-destructor-override)
add_warning(newline-eof)
add_warning(old-style-cast)
add_warning(range-loop-analysis)
add_warning(redundant-parens)
add_warning(reserved-id-macro)
add_warning(shadow-field) # clang 8+
add_warning(shadow-uncaptured-local)
add_warning(shadow)
add_warning(string-plus-int) # clang 8+
add_warning(undef)
add_warning(unreachable-code-return)
add_warning(unreachable-code)
add_warning(unused-exception-parameter)
add_warning(unused-macros)
add_warning(unused-member-function)
add_warning(zero-as-null-pointer-constant)
if (WEVERYTHING)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded -Wno-switch-enum -Wno-deprecated-dynamic-exception-spec -Wno-float-equal -Wno-weak-vtables -Wno-shift-sign-overflow -Wno-sign-conversion -Wno-conversion -Wno-exit-time-destructors -Wno-undefined-func-template -Wno-documentation-unknown-command -Wno-missing-variable-declarations -Wno-unused-template -Wno-global-constructors -Wno-c99-extensions -Wno-missing-prototypes -Wno-weak-template-vtables -Wno-zero-length-array -Wno-gnu-anonymous-struct -Wno-nested-anon-types -Wno-double-promotion -Wno-disabled-macro-expansion -Wno-vla-extension -Wno-vla -Wno-packed")
add_warning(everything)
no_warning(c++98-compat-pedantic)
no_warning(c++98-compat)
no_warning(c99-extensions)
no_warning(conversion)
no_warning(ctad-maybe-unsupported) # clang 9+, linux-only
no_warning(deprecated-dynamic-exception-spec)
no_warning(disabled-macro-expansion)
no_warning(documentation-unknown-command)
no_warning(double-promotion)
no_warning(exit-time-destructors)
no_warning(float-equal)
no_warning(global-constructors)
no_warning(gnu-anonymous-struct)
no_warning(missing-prototypes)
no_warning(missing-variable-declarations)
no_warning(nested-anon-types)
no_warning(packed)
no_warning(padded)
no_warning(return-std-move-in-c++11) # clang 7+
no_warning(shift-sign-overflow)
no_warning(sign-conversion)
no_warning(switch-enum)
no_warning(undefined-func-template)
no_warning(unused-template)
no_warning(vla-extension)
no_warning(vla)
no_warning(weak-template-vtables)
no_warning(weak-vtables)
no_warning(zero-length-array)
# TODO Enable conversion, sign-conversion, double-promotion warnings.
endif ()
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7)
if (WEVERYTHING)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-std-move-in-c++11")
endif ()
endif ()
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow-field -Wstring-plus-int")
if(NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra-semi-stmt -Wempty-init-stmt")
endif()
endif ()
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9)
if (WEVERYTHING AND NOT APPLE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ctad-maybe-unsupported")
endif ()
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
elseif (COMPILER_GCC)
# Add compiler options only to c++ compiler
function(add_cxx_compile_options option)
add_compile_options("$<$<STREQUAL:$<TARGET_PROPERTY:LINKER_LANGUAGE>,CXX>:${option}>")
@ -156,7 +195,7 @@ if (USE_DEBUG_HELPERS)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${INCLUDE_DEBUG_HELPERS}")
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (COMPILER_GCC)
# If we leave this optimization enabled, gcc-7 replaces a pair of SSE intrinsics (16 byte load, store) with a call to memcpy.
# It leads to slow code. This is compiler bug. It looks like this:
#

View File

@ -254,7 +254,7 @@ private:
if (interrupt_listener.check())
{
std::cout << "Stopping launch of queries. SIGINT recieved.\n";
std::cout << "Stopping launch of queries. SIGINT received.\n";
return false;
}

View File

@ -98,7 +98,7 @@ namespace ErrorCodes
extern const int UNKNOWN_PACKET_FROM_SERVER;
extern const int UNEXPECTED_PACKET_FROM_SERVER;
extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED;
extern const int LOGICAL_ERROR;
extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int CANNOT_READLINE;
extern const int SYSTEM_ERROR;
extern const int INVALID_USAGE_OF_INPUT;

View File

@ -70,7 +70,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("codec", boost::program_options::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", boost::program_options::value<int>(), "compression level for codecs spicified via flags")
("level", boost::program_options::value<int>(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
("stat", "print block statistics of compressed data")
;

View File

@ -2430,7 +2430,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
.argument("copy-fault-probability").binding("copy-fault-probability"));
options.addOption(Poco::Util::Option("log-level", "", "sets log level")
.argument("log-level").binding("log-level"));
options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consequitive copier launches will populate /base-dir/launch_id/* directories")
options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories")
.argument("base-dir").binding("base-dir"));
using Me = std::decay_t<decltype(*this)>;

View File

@ -164,7 +164,7 @@ try
setupUsers();
/// Limit on total number of concurrently executing queries.
/// Threre are no need for concurrent threads, override max_concurrent_queries.
/// There is no need for concurrent threads, override max_concurrent_queries.
context->getProcessList().setMaxSize(0);
/// Size of cache for uncompressed blocks. Zero means disabled.
@ -182,7 +182,7 @@ try
context->setDefaultProfiles(config());
/** Init dummy default DB
* NOTE: We force using isolated default database to avoid conflicts with default database from server enviroment
* NOTE: We force using isolated default database to avoid conflicts with default database from server environment
* Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory;
* if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
*/

View File

@ -40,7 +40,7 @@
#include <Common/TerminalSize.h>
static const char * documantation = R"(
static const char * documentation = R"(
Simple tool for table data obfuscation.
It reads input table and produces output table, that retain some properties of input, but contains different data.
@ -979,7 +979,7 @@ try
|| !options.count("input-format")
|| !options.count("output-format"))
{
std::cout << documantation << "\n"
std::cout << documentation << "\n"
<< "\nUsage: " << argv[0] << " [options] < in > out\n"
<< "\nInput must be seekable file (it will be read twice).\n"
<< "\n" << description << "\n"

View File

@ -138,7 +138,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
{
auto message = getCurrentExceptionMessage(true);
response.setStatusAndReason(
Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, bacause of too soon response sending
Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, because of too soon response sending
writeStringBinary(message, out);
tryLogCurrentException(log);
}

View File

@ -88,7 +88,7 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options)
options.addOption(
Poco::Util::Option("listen-host", "", "hostname to listen, default localhost").argument("listen-host").binding("listen-host"));
options.addOption(
Poco::Util::Option("http-timeout", "", "http timout for socket, default 1800").argument("http-timeout").binding("http-timeout"));
Poco::Util::Option("http-timeout", "", "http timeout for socket, default 1800").argument("http-timeout").binding("http-timeout"));
options.addOption(Poco::Util::Option("max-server-connections", "", "max connections to server, default 1024")
.argument("max-server-connections")

View File

@ -315,7 +315,7 @@ void PerformanceTest::runQueries(
stop_conditions.reportIterations(iteration);
if (stop_conditions.areFulfilled())
{
LOG_INFO(log, "Stop conditions fullfilled");
LOG_INFO(log, "Stop conditions fulfilled");
break;
}

View File

@ -200,7 +200,7 @@ private:
if (current.checkPreconditions())
{
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled");
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fulfilled");
LOG_INFO(
log,
"Preparing for run, have " << info.create_and_fill_queries.size() << " create and fill queries");
@ -219,7 +219,7 @@ private:
return {report_builder->buildFullReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
}
else
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are not fullfilled, skip run");
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are not fulfilled, skip run");
return {"", current.checkSIGINT()};
}
@ -361,8 +361,8 @@ try
po::notify(options);
Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t"));
Poco::AutoPtr<Poco::ConsoleChannel> console_chanel(new Poco::ConsoleChannel);
Poco::AutoPtr<Poco::FormattingChannel> channel(new Poco::FormattingChannel(formatter, console_chanel));
Poco::AutoPtr<Poco::ConsoleChannel> console_channel(new Poco::ConsoleChannel);
Poco::AutoPtr<Poco::FormattingChannel> channel(new Poco::FormattingChannel(formatter, console_channel));
Poco::Logger::root().setLevel(options["log-level"].as<std::string>());
Poco::Logger::root().setChannel(channel);

View File

@ -117,7 +117,7 @@ std::string ReportBuilder::buildFullReport(
if (isASCIIString(statistics.exception))
runJSON.set("exception", jsonString(statistics.exception, settings), false);
else
runJSON.set("exception", "Some exception occured with non ASCII message. This may produce invalid JSON. Try reproduce locally.");
runJSON.set("exception", "Some exception occurred with non ASCII message. This may produce invalid JSON. Try reproduce locally.");
}
if (test_info.exec_type == ExecutionType::Loop)

View File

@ -28,23 +28,23 @@ std::pair<String, bool> InterserverIOHTTPHandler::checkAuthentication(Poco::Net:
if (config.has("interserver_http_credentials.user"))
{
if (!request.hasCredentials())
return {"Server requires HTTP Basic authentification, but client doesn't provide it", false};
return {"Server requires HTTP Basic authentication, but client doesn't provide it", false};
String scheme, info;
request.getCredentials(scheme, info);
if (scheme != "Basic")
return {"Server requires HTTP Basic authentification but client provides another method", false};
return {"Server requires HTTP Basic authentication but client provides another method", false};
String user = config.getString("interserver_http_credentials.user");
String password = config.getString("interserver_http_credentials.password", "");
Poco::Net::HTTPBasicCredentials credentials(info);
if (std::make_pair(user, password) != std::make_pair(credentials.getUsername(), credentials.getPassword()))
return {"Incorrect user or password in HTTP Basic authentification", false};
return {"Incorrect user or password in HTTP Basic authentication", false};
}
else if (request.hasCredentials())
{
return {"Client requires HTTP Basic authentification, but server doesn't provide it", false};
return {"Client requires HTTP Basic authentication, but server doesn't provide it", false};
}
return {"", true};
}
@ -99,7 +99,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
if (!response.sent())
writeString(message, *used_output.out);
LOG_WARNING(log, "Query processing failed request: '" << request.getURI() << "' authentification failed");
LOG_WARNING(log, "Query processing failed request: '" << request.getURI() << "' authentication failed");
}
}
catch (Exception & e)

View File

@ -31,7 +31,7 @@ public:
template <typename HandlerType>
class PrometeusRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
class PrometheusRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
{
private:
IServer & server;
@ -39,7 +39,7 @@ private:
PrometheusMetricsWriter metrics_writer;
public:
PrometeusRequestHandlerFactory(IServer & server_, const AsynchronousMetrics & async_metrics_)
PrometheusRequestHandlerFactory(IServer & server_, const AsynchronousMetrics & async_metrics_)
: server(server_)
, endpoint_path(server_.config().getString("prometheus.endpoint", "/metrics"))
, metrics_writer(server_.config(), "prometheus", async_metrics_)
@ -56,6 +56,6 @@ public:
}
};
using PrometeusHandlerFactory = PrometeusRequestHandlerFactory<PrometheusRequestHandler>;
using PrometheusHandlerFactory = PrometheusRequestHandlerFactory<PrometheusRequestHandler>;
}

View File

@ -554,8 +554,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
///
/// It also cannot work with sanitizers.
/// Sanitizers are using quick "frame walking" stack unwinding (this implies -fno-omit-frame-pointer)
/// And they do unwiding frequently (on every malloc/free, thread/mutex operations, etc).
/// They change %rbp during unwinding and it confuses libunwind if signal comes during sanitizer unwiding
/// And they do unwinding frequently (on every malloc/free, thread/mutex operations, etc).
/// They change %rbp during unwinding and it confuses libunwind if signal comes during sanitizer unwinding
/// and query profiler decide to unwind stack with libunwind at this moment.
///
/// Symptoms: you'll get silent Segmentation Fault - without sanitizer message and without usual ClickHouse diagnostics.
@ -724,7 +724,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
socket.setSendTimeout(settings.http_send_timeout);
auto handler_factory = createDefaultHandlerFatory<HTTPHandler>(*this, "HTTPHandler-factory");
if (config().has("prometheus") && config().getInt("prometheus.port", 0) == 0)
handler_factory->addHandler<PrometeusHandlerFactory>(async_metrics);
handler_factory->addHandler<PrometheusHandlerFactory>(async_metrics);
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
handler_factory,
@ -854,7 +854,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
auto handler_factory = new HTTPRequestHandlerFactoryMain(*this, "PrometheusHandler-factory");
handler_factory->addHandler<PrometeusHandlerFactory>(async_metrics);
handler_factory->addHandler<PrometheusHandlerFactory>(async_metrics);
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
handler_factory,
server_pool,

View File

@ -0,0 +1,7 @@
<yandex>
<part_log>
<database>system</database>
<table>part_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
</yandex>

View File

@ -83,30 +83,7 @@
<!-- Quota for user. -->
<quota>default</quota>
<!-- Example of row level security policy. -->
<!-- <databases>
<test>
<filtered_table1>
<filter>a = 1</filter>
</filtered_table1>
<filtered_table2>
<filter>a + b &lt; 1 or c - d &gt; 5</filter>
</filtered_table2>
</test>
</databases> -->
</default>
<!-- Example of user with readonly access. -->
<!-- <readonly>
<password></password>
<networks incl="networks" replace="replace">
<ip>::1</ip>
<ip>127.0.0.1</ip>
</networks>
<profile>readonly</profile>
<quota>default</quota>
</readonly> -->
</users>
<!-- Quotas. -->

View File

@ -101,9 +101,6 @@ namespace
public:
void add(const ASTPtr & condition, bool is_restrictive)
{
if (!condition)
return;
if (is_restrictive)
restrictions.push_back(condition);
else
@ -139,29 +136,32 @@ void RowPolicyContextFactory::PolicyInfo::setPolicy(const RowPolicyPtr & policy_
for (auto index : ext::range_with_static_cast<ConditionIndex>(0, MAX_CONDITION_INDEX))
{
parsed_conditions[index] = nullptr;
const String & condition = policy->conditions[index];
if (condition.empty())
continue;
auto previous_range = std::pair(std::begin(policy->conditions), std::begin(policy->conditions) + index);
auto previous_it = std::find(previous_range.first, previous_range.second, condition);
if (previous_it != previous_range.second)
{
/// The condition is already parsed before.
parsed_conditions[index] = parsed_conditions[previous_it - previous_range.first];
continue;
}
else
/// Try to parse the condition.
try
{
/// Try to parse the condition.
try
{
ParserExpression parser;
parsed_conditions[index] = parseQuery(parser, condition, 0);
}
catch (...)
{
tryLogCurrentException(
&Poco::Logger::get("RowPolicy"),
String("Could not parse the condition ") + RowPolicy::conditionIndexToString(index) + " of row policy "
+ backQuote(policy->getFullName()));
}
ParserExpression parser;
parsed_conditions[index] = parseQuery(parser, condition, 0);
}
catch (...)
{
tryLogCurrentException(
&Poco::Logger::get("RowPolicy"),
String("Could not parse the condition ") + RowPolicy::conditionIndexToString(index) + " of row policy "
+ backQuote(policy->getFullName()));
}
}
}
@ -290,7 +290,8 @@ void RowPolicyContextFactory::mixConditionsForContext(RowPolicyContext & context
auto & mixers = map_of_mixers[std::pair{policy.getDatabase(), policy.getTableName()}];
mixers.policy_ids.push_back(policy_id);
for (auto index : ext::range(0, MAX_CONDITION_INDEX))
mixers.mixers[index].add(info.parsed_conditions[index], policy.isRestrictive());
if (info.parsed_conditions[index])
mixers.mixers[index].add(info.parsed_conditions[index], policy.isRestrictive());
}
}

View File

@ -135,13 +135,25 @@ namespace
for (const String & database : databases)
{
const String database_config = databases_config + "." + database;
Poco::Util::AbstractConfiguration::Keys table_names;
config.keys(database_config, table_names);
Poco::Util::AbstractConfiguration::Keys keys_in_database_config;
config.keys(database_config, keys_in_database_config);
/// Read table properties
for (const String & table_name : table_names)
for (const String & key_in_database_config : keys_in_database_config)
{
const auto filter_config = database_config + "." + table_name + ".filter";
String table_name = key_in_database_config;
String filter_config = database_config + "." + table_name + ".filter";
if (key_in_database_config.starts_with("table["))
{
const auto table_name_config = database_config + "." + table_name + "[@name]";
if (config.has(table_name_config))
{
table_name = config.getString(table_name_config);
filter_config = database_config + ".table[@name='" + table_name + "']";
}
}
if (config.has(filter_config))
{
try

View File

@ -54,7 +54,7 @@ private:
{
AggregateFunctionForEachData & state = data(place);
/// Ensure we have aggreate states for new_size elements, allocate
/// Ensure we have aggregate states for new_size elements, allocate
/// from arena if needed. When reallocating, we can't copy the
/// states to new buffer with memcpy, because they may contain pointers
/// to themselves. In particular, this happens when a state contains

View File

@ -774,9 +774,7 @@ std::unique_ptr<Exception> Connection::receiveException()
{
//LOG_TRACE(log_wrapper.get(), "Receiving exception");
Exception e;
readException(e, *in, "Received from " + getDescription());
return std::unique_ptr<Exception>{ e.clone() };
return std::make_unique<Exception>(readException(*in, "Received from " + getDescription()));
}

View File

@ -15,7 +15,7 @@
private:
friend class COW<Column>;
/// Leave all constructors in private section. They will be avaliable through 'create' method.
/// Leave all constructors in private section. They will be available through 'create' method.
Column();
/// Provide 'clone' method. It can be virtual if you want polymorphic behaviour.

View File

@ -23,6 +23,7 @@ namespace ErrorCodes
extern const int UNKNOWN_EXCEPTION;
extern const int CANNOT_TRUNCATE_FILE;
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
@ -33,6 +34,8 @@ Exception::Exception()
Exception::Exception(const std::string & msg, int code)
: Poco::Exception(msg, code)
{
// In debug builds, treat LOGICAL_ERROR as an assertion failure.
assert(code != ErrorCodes::LOGICAL_ERROR);
}
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)

View File

@ -85,7 +85,7 @@ TEST(zkutil, multi_async)
ops.clear();
auto res = fut.get();
ASSERT_TRUE(res.error == Coordination::ZOK);
ASSERT_EQ(res.error, Coordination::ZOK);
ASSERT_EQ(res.responses.size(), 2);
}
@ -121,7 +121,7 @@ TEST(zkutil, multi_async)
ops.clear();
auto res = fut.get();
ASSERT_TRUE(res.error == Coordination::ZNODEEXISTS);
ASSERT_EQ(res.error, Coordination::ZNODEEXISTS);
ASSERT_EQ(res.responses.size(), 2);
}
}

View File

@ -516,13 +516,13 @@ UInt32 CompressionCodecT64::doCompressData(const char * src, UInt32 src_size, ch
break;
}
throw Exception("Connot compress with T64", ErrorCodes::CANNOT_COMPRESS);
throw Exception("Cannot compress with T64", ErrorCodes::CANNOT_COMPRESS);
}
void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const
{
if (!src_size)
throw Exception("Connot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS);
throw Exception("Cannot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS);
UInt8 cookie = unalignedLoad<UInt8>(src);
src += 1;
@ -553,7 +553,7 @@ void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, ch
break;
}
throw Exception("Connot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS);
throw Exception("Cannot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS);
}
void CompressionCodecT64::useInfoAboutType(DataTypePtr data_type)

View File

@ -301,10 +301,6 @@ struct Codec
: codec_statement(std::move(codec_statement_)),
expected_compression_ratio(expected_compression_ratio_)
{}
Codec()
: Codec(std::string())
{}
};
@ -314,23 +310,12 @@ struct CodecTestSequence
std::vector<char> serialized_data;
DataTypePtr data_type;
CodecTestSequence()
: name(),
serialized_data(),
data_type()
{}
CodecTestSequence(std::string name_, std::vector<char> serialized_data_, DataTypePtr data_type_)
: name(name_),
serialized_data(serialized_data_),
data_type(data_type_)
{}
CodecTestSequence(const CodecTestSequence &) = default;
CodecTestSequence & operator=(const CodecTestSequence &) = default;
CodecTestSequence(CodecTestSequence &&) = default;
CodecTestSequence & operator=(CodecTestSequence &&) = default;
CodecTestSequence & append(const CodecTestSequence & other)
{
assert(data_type->equals(*other.data_type));
@ -819,24 +804,6 @@ std::vector<CodecTestSequence> generatePyramidOfSequences(const size_t sequences
return sequences;
};
// Just as if all sequences from generatePyramidOfSequences were appended to one-by-one to the first one.
template <typename T, typename Generator>
CodecTestSequence generatePyramidSequence(const size_t sequences_count, Generator && generator, const char* generator_name)
{
CodecTestSequence sequence;
sequence.data_type = makeDataType<T>();
sequence.serialized_data.reserve(sequences_count * sequences_count * sizeof(T));
for (size_t i = 1; i < sequences_count; ++i)
{
std::string name = generator_name + std::string(" from 0 to ") + std::to_string(i);
sequence.append(generateSeq<T>(std::forward<decltype(generator)>(generator), name.c_str(), 0, i));
}
return sequence;
};
// helper macro to produce human-friendly sequence name from generator
#define G(generator) generator, #generator
@ -853,17 +820,17 @@ const auto DefaultCodecsToTest = ::testing::Values(
// test cases
///////////////////////////////////////////////////////////////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(Simple,
INSTANTIATE_TEST_SUITE_P(Simple,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
::testing::Values(
makeSeq<Float64>(1, 2, 3, 5, 7, 11, 13, 17, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97)
)
),
)
);
INSTANTIATE_TEST_CASE_P(SmallSequences,
INSTANTIATE_TEST_SUITE_P(SmallSequences,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -877,10 +844,10 @@ INSTANTIATE_TEST_CASE_P(SmallSequences,
+ generatePyramidOfSequences<UInt32>(42, G(SequentialGenerator(1)))
+ generatePyramidOfSequences<UInt64>(42, G(SequentialGenerator(1)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(Mixed,
INSTANTIATE_TEST_SUITE_P(Mixed,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -894,10 +861,10 @@ INSTANTIATE_TEST_CASE_P(Mixed,
generateSeq<UInt32>(G(MinMaxGenerator()), 1, 5) + generateSeq<UInt32>(G(SequentialGenerator(1)), 1, 1001),
generateSeq<UInt64>(G(MinMaxGenerator()), 1, 5) + generateSeq<UInt64>(G(SequentialGenerator(1)), 1, 1001)
)
),
)
);
INSTANTIATE_TEST_CASE_P(SameValueInt,
INSTANTIATE_TEST_SUITE_P(SameValueInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -911,10 +878,10 @@ INSTANTIATE_TEST_CASE_P(SameValueInt,
generateSeq<UInt32>(G(SameValueGenerator(1000))),
generateSeq<UInt64>(G(SameValueGenerator(1000)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(SameNegativeValueInt,
INSTANTIATE_TEST_SUITE_P(SameNegativeValueInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -928,10 +895,10 @@ INSTANTIATE_TEST_CASE_P(SameNegativeValueInt,
generateSeq<UInt32>(G(SameValueGenerator(-1000))),
generateSeq<UInt64>(G(SameValueGenerator(-1000)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(SameValueFloat,
INSTANTIATE_TEST_SUITE_P(SameValueFloat,
CodecTest,
::testing::Combine(
::testing::Values(
@ -942,10 +909,10 @@ INSTANTIATE_TEST_CASE_P(SameValueFloat,
generateSeq<Float32>(G(SameValueGenerator(M_E))),
generateSeq<Float64>(G(SameValueGenerator(M_E)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(SameNegativeValueFloat,
INSTANTIATE_TEST_SUITE_P(SameNegativeValueFloat,
CodecTest,
::testing::Combine(
::testing::Values(
@ -956,10 +923,10 @@ INSTANTIATE_TEST_CASE_P(SameNegativeValueFloat,
generateSeq<Float32>(G(SameValueGenerator(-1 * M_E))),
generateSeq<Float64>(G(SameValueGenerator(-1 * M_E)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(SequentialInt,
INSTANTIATE_TEST_SUITE_P(SequentialInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -973,12 +940,12 @@ INSTANTIATE_TEST_CASE_P(SequentialInt,
generateSeq<UInt32>(G(SequentialGenerator(1))),
generateSeq<UInt64>(G(SequentialGenerator(1)))
)
),
)
);
// -1, -2, -3, ... etc for signed
// 0xFF, 0xFE, 0xFD, ... for unsigned
INSTANTIATE_TEST_CASE_P(SequentialReverseInt,
INSTANTIATE_TEST_SUITE_P(SequentialReverseInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -992,10 +959,10 @@ INSTANTIATE_TEST_CASE_P(SequentialReverseInt,
generateSeq<UInt32>(G(SequentialGenerator(-1))),
generateSeq<UInt64>(G(SequentialGenerator(-1)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(SequentialFloat,
INSTANTIATE_TEST_SUITE_P(SequentialFloat,
CodecTest,
::testing::Combine(
::testing::Values(
@ -1006,10 +973,10 @@ INSTANTIATE_TEST_CASE_P(SequentialFloat,
generateSeq<Float32>(G(SequentialGenerator(M_E))),
generateSeq<Float64>(G(SequentialGenerator(M_E)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(SequentialReverseFloat,
INSTANTIATE_TEST_SUITE_P(SequentialReverseFloat,
CodecTest,
::testing::Combine(
::testing::Values(
@ -1020,10 +987,10 @@ INSTANTIATE_TEST_CASE_P(SequentialReverseFloat,
generateSeq<Float32>(G(SequentialGenerator(-1 * M_E))),
generateSeq<Float64>(G(SequentialGenerator(-1 * M_E)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(MonotonicInt,
INSTANTIATE_TEST_SUITE_P(MonotonicInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -1037,10 +1004,10 @@ INSTANTIATE_TEST_CASE_P(MonotonicInt,
generateSeq<UInt32>(G(MonotonicGenerator(1, 5))),
generateSeq<UInt64>(G(MonotonicGenerator(1, 5)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(MonotonicReverseInt,
INSTANTIATE_TEST_SUITE_P(MonotonicReverseInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -1054,10 +1021,10 @@ INSTANTIATE_TEST_CASE_P(MonotonicReverseInt,
generateSeq<UInt32>(G(MonotonicGenerator(-1, 5))),
generateSeq<UInt64>(G(MonotonicGenerator(-1, 5)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(MonotonicFloat,
INSTANTIATE_TEST_SUITE_P(MonotonicFloat,
CodecTest,
::testing::Combine(
::testing::Values(
@ -1067,10 +1034,10 @@ INSTANTIATE_TEST_CASE_P(MonotonicFloat,
generateSeq<Float32>(G(MonotonicGenerator<Float32>(M_E, 5))),
generateSeq<Float64>(G(MonotonicGenerator<Float64>(M_E, 5)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(MonotonicReverseFloat,
INSTANTIATE_TEST_SUITE_P(MonotonicReverseFloat,
CodecTest,
::testing::Combine(
::testing::Values(
@ -1080,10 +1047,10 @@ INSTANTIATE_TEST_CASE_P(MonotonicReverseFloat,
generateSeq<Float32>(G(MonotonicGenerator<Float32>(-1 * M_E, 5))),
generateSeq<Float64>(G(MonotonicGenerator<Float64>(-1 * M_E, 5)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(RandomInt,
INSTANTIATE_TEST_SUITE_P(RandomInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -1093,10 +1060,10 @@ INSTANTIATE_TEST_CASE_P(RandomInt,
generateSeq<UInt32>(G(RandomGenerator<UInt32>(0, 0, 1000'000'000))),
generateSeq<UInt64>(G(RandomGenerator<UInt64>(0, 0, 1000'000'000)))
)
),
)
);
INSTANTIATE_TEST_CASE_P(RandomishInt,
INSTANTIATE_TEST_SUITE_P(RandomishInt,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -1108,10 +1075,10 @@ INSTANTIATE_TEST_CASE_P(RandomishInt,
generateSeq<Float32>(G(RandomishGenerator)),
generateSeq<Float64>(G(RandomishGenerator))
)
),
)
);
INSTANTIATE_TEST_CASE_P(RandomishFloat,
INSTANTIATE_TEST_SUITE_P(RandomishFloat,
CodecTest,
::testing::Combine(
DefaultCodecsToTest,
@ -1119,11 +1086,11 @@ INSTANTIATE_TEST_CASE_P(RandomishFloat,
generateSeq<Float32>(G(RandomishGenerator)),
generateSeq<Float64>(G(RandomishGenerator))
)
),
)
);
// Double delta overflow case, deltas are out of bounds for target type
INSTANTIATE_TEST_CASE_P(OverflowInt,
INSTANTIATE_TEST_SUITE_P(OverflowInt,
CodecTest,
::testing::Combine(
::testing::Values(
@ -1136,10 +1103,10 @@ INSTANTIATE_TEST_CASE_P(OverflowInt,
generateSeq<UInt64>(G(MinMaxGenerator())),
generateSeq<Int64>(G(MinMaxGenerator()))
)
),
)
);
INSTANTIATE_TEST_CASE_P(OverflowFloat,
INSTANTIATE_TEST_SUITE_P(OverflowFloat,
CodecTest,
::testing::Combine(
::testing::Values(
@ -1152,7 +1119,7 @@ INSTANTIATE_TEST_CASE_P(OverflowFloat,
generateSeq<Float32>(G(FFand0Generator())),
generateSeq<Float64>(G(FFand0Generator()))
)
),
)
);
template <typename ValueType>
@ -1189,7 +1156,7 @@ auto DDCompatibilityTestSequence()
#define BIN_STR(x) std::string{x, sizeof(x) - 1}
INSTANTIATE_TEST_CASE_P(DoubleDelta,
INSTANTIATE_TEST_SUITE_P(DoubleDelta,
CodecTest_Compatibility,
::testing::Combine(
::testing::Values(Codec("DoubleDelta")),
@ -1227,7 +1194,7 @@ INSTANTIATE_TEST_CASE_P(DoubleDelta,
BIN_STR("\x94\xd4\x00\x00\x00\x98\x01\x00\x00\x08\x00\x33\x00\x00\x00\x2a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x6b\x65\x5f\x50\x34\xff\x4f\xaf\xbc\xe3\x5d\xa3\xd3\xd9\xf6\x1f\xe2\x07\x7c\x47\x20\x67\x48\x07\x47\xff\x47\xf6\xfe\xf8\x00\x00\x70\x6b\xd0\x00\x02\x83\xd9\xfb\x9f\xdc\x1f\xfc\x20\x1e\x80\x00\x22\xc8\xf0\x00\x00\x66\x67\xa0\x00\x02\x00\x3d\x00\x00\x0f\xff\xe8\x00\x00\x7f\xee\xff\xdf\x00\x00\x70\x0d\x7a\x00\x02\x80\x7b\x9f\xf7\x9f\xfb\xc0\x00\x00\xff\xfe\x00\x00\x08\x00\xfc\x00\x00\x00\x04\x00\x06\xbe\x4f\xbf\xff\xd6\x0c\xff\x00\x00\x00\x01\x00\x00\x00\x03\xf8\x00\x00\x00\x08\x00\x00\x00\x0f\xc0\x00\x00\x00\x3f\xff\xff\xff\xfb\xff\xff\xff\xfb\xe0\x00\x00\x01\xc0\x00\x00\x06\x9f\x80\x00\x00\x0a\x00\x00\x00\x34\xf3\xff\xff\xff\xe7\x9f\xff\xff\xff\x7e\x00\x00\x00\x00\xff\xff\xff\xfd\xf0\x00\x00\x00\x07\xff\xff\xff\xf0")
},
})
),
)
);
template <typename ValueType>
@ -1263,7 +1230,7 @@ auto GCompatibilityTestSequence()
return generateSeq<ValueType>(G(PrimesWithMultiplierGenerator(intExp10(sizeof(ValueType)))), 0, 42);
}
INSTANTIATE_TEST_CASE_P(Gorilla,
INSTANTIATE_TEST_SUITE_P(Gorilla,
CodecTest_Compatibility,
::testing::Combine(
::testing::Values(Codec("Gorilla")),
@ -1301,14 +1268,31 @@ INSTANTIATE_TEST_CASE_P(Gorilla,
BIN_STR("\x95\x91\x00\x00\x00\x50\x01\x00\x00\x08\x00\x2a\x00\x00\x00\x00\xc2\xeb\x0b\x00\x00\x00\x00\xe3\x2b\xa0\xa6\x19\x85\x98\xdc\x45\x74\x74\x43\xc2\x57\x41\x4c\x6e\x42\x79\xd9\x8f\x88\xa5\x05\xf3\xf1\x94\xa3\x62\x1e\x02\xdf\x05\x10\xf1\x15\x97\x35\x2a\x50\x71\x0f\x09\x6c\x89\xf7\x65\x1d\x11\xb7\xcc\x7d\x0b\x70\xc1\x86\x88\x48\x47\x87\xb6\x32\x26\xa7\x86\x87\x88\xd3\x93\x3d\xfc\x28\x68\x85\x05\x0b\x13\xc6\x5f\xd4\x70\xe1\x5e\x76\xf1\x9f\xf3\x33\x2a\x14\x14\x5e\x40\xc1\x5c\x28\x3f\xec\x43\x03\x05\x11\x91\xe8\xeb\x8e\x0a\x0e\x27\x21\x55\xcb\x39\xbc\x6a\xff\x11\x5d\x81\xa0\xa6\x10")
},
})
),
)
);
// These 'tests' try to measure performance of encoding and decoding and hence only make sence to be run locally,
// also they require pretty big data to run agains and generating this data slows down startup of unit test process.
// So un-comment only at your discretion.
//INSTANTIATE_TEST_CASE_P(DoubleDelta,
// Just as if all sequences from generatePyramidOfSequences were appended to one-by-one to the first one.
//template <typename T, typename Generator>
//CodecTestSequence generatePyramidSequence(const size_t sequences_count, Generator && generator, const char* generator_name)
//{
// CodecTestSequence sequence;
// sequence.data_type = makeDataType<T>();
// sequence.serialized_data.reserve(sequences_count * sequences_count * sizeof(T));
//
// for (size_t i = 1; i < sequences_count; ++i)
// {
// std::string name = generator_name + std::string(" from 0 to ") + std::to_string(i);
// sequence.append(generateSeq<T>(std::forward<decltype(generator)>(generator), name.c_str(), 0, i));
// }
//
// return sequence;
//};
//INSTANTIATE_TEST_SUITE_P(DoubleDelta,
// CodecTest_Performance,
// ::testing::Combine(
// ::testing::Values(Codec("DoubleDelta")),
@ -1325,7 +1309,7 @@ INSTANTIATE_TEST_CASE_P(Gorilla,
// ),
//);
//INSTANTIATE_TEST_CASE_P(Gorilla,
//INSTANTIATE_TEST_SUITE_P(Gorilla,
// CodecTest_Performance,
// ::testing::Combine(
// ::testing::Values(Codec("Gorilla")),

View File

@ -120,7 +120,7 @@ TEST_P(DecimalUtilsSplitAndCombineTest, getFractionalPart_Decimal128)
}
// Intentionally small values that fit into 32-bit in order to cover Decimal32, Decimal64 and Decimal128 with single set of data.
INSTANTIATE_TEST_CASE_P(Basic,
INSTANTIATE_TEST_SUITE_P(Basic,
DecimalUtilsSplitAndCombineTest,
::testing::ValuesIn(std::initializer_list<DecimalUtilsSplitAndCombineTestParam>{
{
@ -168,5 +168,5 @@ INSTANTIATE_TEST_CASE_P(Basic,
89
}
}
}
),);
})
);

View File

@ -62,6 +62,9 @@ Block AddingDefaultsBlockInputStream::readImpl()
if (evaluate_block.has(column.first))
evaluate_block.erase(column.first);
if (!evaluate_block.columns())
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), column_defaults, context, false);
std::unordered_map<size_t, MutableColumnPtr> mixed_columns;

View File

@ -60,8 +60,6 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_)
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_)
{
ColumnNumbers positions;
/// Fill in the column numbers that need to be aggregated.
for (size_t i = 0; i < num_columns; ++i)
{
@ -96,7 +94,7 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
columns_to_simple_aggregate.emplace_back(std::move(desc));
if (recursiveRemoveLowCardinality(column.type).get() != column.type.get())
positions.emplace_back(i);
converted_lc_columns.emplace_back(i);
}
else
{
@ -105,10 +103,12 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
}
}
if (!positions.empty())
result_header = header;
if (!converted_lc_columns.empty())
{
for (auto & input : children)
input = std::make_shared<RemovingLowCardinalityBlockInputStream>(input, positions);
input = std::make_shared<RemovingLowCardinalityBlockInputStream>(input, converted_lc_columns);
header = children.at(0)->getHeader();
}
@ -134,7 +134,15 @@ Block AggregatingSortedBlockInputStream::readImpl()
columns_to_aggregate[i] = typeid_cast<ColumnAggregateFunction *>(merged_columns[column_numbers_to_aggregate[i]].get());
merge(merged_columns, queue_without_collation);
return header.cloneWithColumns(std::move(merged_columns));
for (auto & pos : converted_lc_columns)
{
auto & from_type = header.getByPosition(pos).type;
auto & to_type = result_header.getByPosition(pos).type;
merged_columns[pos] = (*recursiveTypeConversion(std::move(merged_columns[pos]), from_type, to_type)).mutate();
}
return result_header.cloneWithColumns(std::move(merged_columns));
}

View File

@ -31,6 +31,8 @@ public:
bool isSortedOutput() const override { return true; }
Block getHeader() const override { return result_header; }
protected:
/// Can return 1 more records than max_block_size.
Block readImpl() override;
@ -52,6 +54,9 @@ private:
SharedBlockRowRef current_key; /// The current primary key.
SharedBlockRowRef next_key; /// The primary key of the next row.
Block result_header;
ColumnNumbers converted_lc_columns;
/** We support two different cursors - with Collation and without.
* Templates are used instead of polymorphic SortCursor and calls to virtual functions.
*/

View File

@ -35,7 +35,7 @@ DataTypePtr DataTypeFactory::get(const ASTPtr & ast) const
if (const auto * func = ast->as<ASTFunction>())
{
if (func->parameters)
throw Exception("Data type cannot have multiple parenthesed parameters.", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE);
throw Exception("Data type cannot have multiple parenthesized parameters.", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE);
return get(func->name, func->arguments);
}

View File

@ -104,7 +104,7 @@ TEST_P(MostSubtypeTest, getLeastSupertype)
}
}
INSTANTIATE_TEST_CASE_P(data_type,
INSTANTIATE_TEST_SUITE_P(data_type,
LeastSuperTypeTest,
::testing::ValuesIn(
std::initializer_list<TypesTestCase>{
@ -159,10 +159,10 @@ INSTANTIATE_TEST_CASE_P(data_type,
{"Tuple(Int64,Int8) Tuple(UInt64)", nullptr},
{"Array(Int64) Array(String)", nullptr},
}
),
)
);
INSTANTIATE_TEST_CASE_P(data_type,
INSTANTIATE_TEST_SUITE_P(data_type,
MostSubtypeTest,
::testing::ValuesIn(
std::initializer_list<TypesTestCase>{
@ -210,5 +210,6 @@ INSTANTIATE_TEST_CASE_P(data_type,
{"Int8 String", nullptr},
{"Nothing", nullptr},
{"FixedString(16) FixedString(8) String", nullptr},
}),
}
)
);

View File

@ -32,7 +32,7 @@ public:
protected:
DatabaseWithDictionaries(const String & name, const String & metadata_path_, const String & logger)
: DatabaseOnDisk(name, metadata_path_, logger) {}
: DatabaseOnDisk(name, metadata_path_, logger) {}
void attachToExternalDictionariesLoader(Context & context);
void detachFromExternalDictionariesLoader();

View File

@ -43,7 +43,7 @@ public:
using GetColumnsFunction = std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute> & attributes)>;
// Used to separate key columns format for storage and view.
// Calls get_key_columns_function to get key column for dictionary get fuction call
// Calls get_key_columns_function to get key column for dictionary get function call
// and get_view_columns_function to get key representation.
// Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
DictionaryBlockInputStream(

View File

@ -371,7 +371,7 @@ struct JavaHashUTF16LEImpl
}
if (size % 2 != 0)
throw Exception("Arguments for javaHashUTF16LE must be in the form of UTF-16", ErrorCodes::LOGICAL_ERROR);
throw Exception("Arguments for javaHashUTF16LE must be in the form of UTF-16", ErrorCodes::BAD_ARGUMENTS);
UInt32 h = 0;
for (size_t i = 0; i < size; i += 2)

View File

@ -238,7 +238,7 @@ struct StringSource
size_t getElementSize() const
{
return offsets[row_num] - prev_offset;
return offsets[row_num] - prev_offset - 1;
}
Slice getWhole() const

View File

@ -36,6 +36,7 @@ namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
namespace Regexps
@ -205,7 +206,7 @@ namespace MultiRegexps
else
throw Exception(
"Pattern '" + str_patterns[error->expression] + "' failed with error '" + String(error->message),
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::BAD_ARGUMENTS);
}
ProfileEvents::increment(ProfileEvents::RegexpCreated);

View File

@ -18,6 +18,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
struct FormatImpl
@ -45,11 +46,11 @@ struct FormatImpl
for (UInt64 pos = l; pos < r; pos++)
{
if (!isNumericASCII(description[pos]))
throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::LOGICAL_ERROR);
throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
res = res * 10 + description[pos] - '0';
if (res >= argument_threshold)
throw Exception(
"Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::LOGICAL_ERROR);
"Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::BAD_ARGUMENTS);
}
}
@ -114,7 +115,7 @@ struct FormatImpl
}
if (is_open_curly)
throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::LOGICAL_ERROR);
throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
String to_add = String(pattern.data() + start_pos, i - start_pos);
double_brace_removal(to_add);
@ -137,7 +138,7 @@ struct FormatImpl
}
if (!is_open_curly)
throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::LOGICAL_ERROR);
throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
is_open_curly = false;
@ -145,17 +146,17 @@ struct FormatImpl
{
if (is_plain_numbering && !*is_plain_numbering)
throw Exception(
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::LOGICAL_ERROR);
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
is_plain_numbering = true;
if (index_if_plain >= argument_number)
throw Exception("Argument is too big for formatting", ErrorCodes::LOGICAL_ERROR);
throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
*index_positions_ptr = index_if_plain++;
}
else
{
if (is_plain_numbering && *is_plain_numbering)
throw Exception(
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::LOGICAL_ERROR);
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
is_plain_numbering = false;
UInt64 arg;
@ -163,7 +164,7 @@ struct FormatImpl
if (arg >= argument_number)
throw Exception(
"Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::LOGICAL_ERROR);
"Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
*index_positions_ptr = arg;
}
@ -183,7 +184,7 @@ struct FormatImpl
}
if (is_open_curly)
throw Exception("Last open curly brace is not closed", ErrorCodes::LOGICAL_ERROR);
throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
double_brace_removal(to_add);

View File

@ -40,6 +40,8 @@ public:
bool isVariadic() const override { return true; }
bool isStateful() const override { return true; }
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }

View File

@ -959,7 +959,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field)
}
void readException(Exception & e, ReadBuffer & buf, const String & additional_message)
Exception readException(ReadBuffer & buf, const String & additional_message)
{
int code = 0;
String name;
@ -986,14 +986,12 @@ void readException(Exception & e, ReadBuffer & buf, const String & additional_me
if (!stack_trace.empty())
out << " Stack trace:\n\n" << stack_trace;
e = Exception(out.str(), code);
return Exception(out.str(), code);
}
void readAndThrowException(ReadBuffer & buf, const String & additional_message)
{
Exception e;
readException(e, buf, additional_message);
e.rethrow();
readException(buf, additional_message).rethrow();
}

View File

@ -930,7 +930,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field);
* (type is cut to base class, 'message' replaced by 'displayText', and stack trace is appended to 'message')
* Some additional message could be appended to exception (example: you could add information about from where it was received).
*/
void readException(Exception & e, ReadBuffer & buf, const String & additional_message = "");
Exception readException(ReadBuffer & buf, const String & additional_message = "");
void readAndThrowException(ReadBuffer & buf, const String & additional_message = "");

View File

@ -79,7 +79,7 @@ TEST_P(DateTime64StringParseBestEffortTest, parse)
// YYYY-MM-DD HH:MM:SS.NNNNNNNNN
INSTANTIATE_TEST_CASE_P(Basic,
INSTANTIATE_TEST_SUITE_P(Basic,
DateTime64StringParseTest,
::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
{
@ -130,10 +130,10 @@ INSTANTIATE_TEST_CASE_P(Basic,
1568650817'1ULL,
1
}
}),
})
);
INSTANTIATE_TEST_CASE_P(BestEffort,
INSTANTIATE_TEST_SUITE_P(BestEffort,
DateTime64StringParseBestEffortTest,
::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
{
@ -142,13 +142,13 @@ INSTANTIATE_TEST_CASE_P(BestEffort,
1568650817'123456ULL,
6
}
}),
})
);
// TODO: add negative test cases for invalid strings, verifying that error is reported properly
INSTANTIATE_TEST_CASE_P(Basic,
INSTANTIATE_TEST_SUITE_P(Basic,
DateTime64StringWriteTest,
::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
{
@ -181,6 +181,6 @@ INSTANTIATE_TEST_CASE_P(Basic,
1568650817'001ULL,
3
}
}),
})
);

View File

@ -177,7 +177,7 @@ TEST_P(BitIO, WriteAndRead)
}
}
INSTANTIATE_TEST_CASE_P(Simple,
INSTANTIATE_TEST_SUITE_P(Simple,
BitIO,
::testing::ValuesIn(std::initializer_list<TestCaseParameter>{
{
@ -221,7 +221,7 @@ INSTANTIATE_TEST_CASE_P(Simple,
"10101001 10111010 11101111 10101111 10111010 11101011 10101001 00000000 " // 256
"10101111 10111010 11101011 10101001 00001111 11110000 00001110 11111111 " // 320
}
}),
})
);
TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
@ -241,12 +241,13 @@ TestCaseParameter primes_case(UInt8 repeat_times, UInt64 pattern)
return TestCaseParameter(test_data);
}
INSTANTIATE_TEST_CASE_P(Primes,
BitIO,
::testing::Values(
primes_case(11, 0xFFFFFFFFFFFFFFFFULL),
primes_case(11, BIT_PATTERN)
),);
INSTANTIATE_TEST_SUITE_P(Primes,
BitIO,
::testing::Values(
primes_case(11, 0xFFFFFFFFFFFFFFFFULL),
primes_case(11, BIT_PATTERN)
)
);
TEST(BitHelpers, maskLowBits)
{

View File

@ -17,7 +17,7 @@ namespace DB
{
/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted.
/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed.
/// Both types and function could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed.
/// Visits AST nodes, add default database to tables if not set. There's different logic for DDLs and selects.
class AddDefaultDatabaseVisitor

View File

@ -157,7 +157,7 @@ Aggregator::Aggregator(const Params & params_)
total_size_of_aggregate_states = 0;
all_aggregates_has_trivial_destructor = true;
// aggreate_states will be aligned as below:
// aggregate_states will be aligned as below:
// |<-- state_1 -->|<-- pad_1 -->|<-- state_2 -->|<-- pad_2 -->| .....
//
// pad_N will be used to match alignment requirement for each next state.
@ -168,7 +168,7 @@ Aggregator::Aggregator(const Params & params_)
total_size_of_aggregate_states += params.aggregates[i].function->sizeOfData();
// aggreate states are aligned based on maximum requirement
// aggregate states are aligned based on maximum requirement
align_aggregate_states = std::max(align_aggregate_states, params.aggregates[i].function->alignOfData());
// If not the last aggregate_state, we need pad it so that next aggregate_state will be aligned.

View File

@ -96,7 +96,7 @@ DataTypePtr BloomFilter::getPrimitiveType(const DataTypePtr & data_type)
if (!typeid_cast<const DataTypeArray *>(array_type->getNestedType().get()))
return getPrimitiveType(array_type->getNestedType());
else
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
}
if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_type.get()))

View File

@ -23,6 +23,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
}
struct BloomFilterHash
@ -33,45 +34,64 @@ struct BloomFilterHash
15033938188484401405ULL, 18286745649494826751ULL, 6852245486148412312ULL, 8886056245089344681ULL, 10151472371158292780ULL
};
static ColumnPtr hashWithField(const IDataType * data_type, const Field & field)
template <typename FieldGetType, typename FieldType>
static UInt64 getNumberTypeHash(const Field & field)
{
WhichDataType which(data_type);
UInt64 hash = 0;
bool unexpected_type = false;
/// For negative, we should convert the type to make sure the symbol is in right place
return field.isNull() ? intHash64(0) : intHash64(ext::bit_cast<UInt64>(FieldType(field.safeGet<FieldGetType>())));
}
if (field.isNull())
{
if (which.isInt() || which.isUInt() || which.isEnum() || which.isDateOrDateTime() || which.isFloat())
hash = intHash64(0);
else if (which.isString())
hash = CityHash_v1_0_2::CityHash64("", 0);
else if (which.isFixedString())
{
const auto * fixed_string_type = typeid_cast<const DataTypeFixedString *>(data_type);
const std::vector<char> value(fixed_string_type->getN(), 0);
hash = CityHash_v1_0_2::CityHash64(value.data(), value.size());
}
else
unexpected_type = true;
}
else if (which.isUInt() || which.isDateOrDateTime())
hash = intHash64(field.safeGet<UInt64>());
else if (which.isInt() || which.isEnum())
hash = intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()));
else if (which.isFloat32() || which.isFloat64())
hash = intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()));
else if (which.isString() || which.isFixedString())
static UInt64 getStringTypeHash(const Field & field)
{
if (!field.isNull())
{
const auto & value = field.safeGet<String>();
hash = CityHash_v1_0_2::CityHash64(value.data(), value.size());
return CityHash_v1_0_2::CityHash64(value.data(), value.size());
}
else
unexpected_type = true;
if (unexpected_type)
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
return CityHash_v1_0_2::CityHash64("", 0);
}
return ColumnConst::create(ColumnUInt64::create(1, hash), 1);
static UInt64 getFixedStringTypeHash(const Field & field, const IDataType * type)
{
if (!field.isNull())
{
const auto & value = field.safeGet<String>();
return CityHash_v1_0_2::CityHash64(value.data(), value.size());
}
const auto * fixed_string_type = typeid_cast<const DataTypeFixedString *>(type);
const std::vector<char> value(fixed_string_type->getN(), 0);
return CityHash_v1_0_2::CityHash64(value.data(), value.size());
}
static ColumnPtr hashWithField(const IDataType * data_type, const Field & field)
{
const auto & build_hash_column = [&](const UInt64 & hash) -> ColumnPtr
{
return ColumnConst::create(ColumnUInt64::create(1, hash), 1);
};
WhichDataType which(data_type);
if (which.isUInt8()) return build_hash_column(getNumberTypeHash<UInt64, UInt8>(field));
else if (which.isUInt16()) return build_hash_column(getNumberTypeHash<UInt64, UInt16>(field));
else if (which.isUInt32()) return build_hash_column(getNumberTypeHash<UInt64, UInt32>(field));
else if (which.isUInt64()) return build_hash_column(getNumberTypeHash<UInt64, UInt64>(field));
else if (which.isInt8()) return build_hash_column(getNumberTypeHash<Int64, Int8>(field));
else if (which.isInt16()) return build_hash_column(getNumberTypeHash<Int64, Int16>(field));
else if (which.isInt32()) return build_hash_column(getNumberTypeHash<Int64, Int32>(field));
else if (which.isInt64()) return build_hash_column(getNumberTypeHash<Int64, Int64>(field));
else if (which.isEnum8()) return build_hash_column(getNumberTypeHash<Int64, Int8>(field));
else if (which.isEnum16()) return build_hash_column(getNumberTypeHash<Int64, Int16>(field));
else if (which.isDate()) return build_hash_column(getNumberTypeHash<UInt64, UInt16>(field));
else if (which.isDateTime()) return build_hash_column(getNumberTypeHash<UInt64, UInt32>(field));
else if (which.isFloat32()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
else if (which.isFloat64()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
else if (which.isString()) return build_hash_column(getStringTypeHash(field));
else if (which.isFixedString()) return build_hash_column(getFixedStringTypeHash(field, data_type));
else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
}
static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit)
@ -82,7 +102,7 @@ struct BloomFilterHash
const auto * array_col = typeid_cast<const ColumnArray *>(column.get());
if (checkAndGetColumn<ColumnNullable>(array_col->getData()))
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
const auto & offsets = array_col->getOffsets();
limit = offsets[pos + limit - 1] - offsets[pos - 1]; /// PaddedPODArray allows access on index -1.
@ -127,7 +147,7 @@ struct BloomFilterHash
else if (which.isFloat64()) getNumberTypeHash<Float64, is_first>(column, vec, pos);
else if (which.isString()) getStringTypeHash<is_first>(column, vec, pos);
else if (which.isFixedString()) getStringTypeHash<is_first>(column, vec, pos);
else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
}
template <typename Type, bool is_first>

View File

@ -10,6 +10,7 @@
#include <IO/ReadHelpers.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/Util/Application.h>
#include <ext/range.h>
namespace DB
{
@ -449,18 +450,64 @@ void Cluster::initMisc()
}
}
std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const
{
return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)};
}
std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
{
return std::unique_ptr<Cluster>{ new Cluster(*this, {index}) };
return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, {index}) };
}
std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector<size_t> & indices) const
{
return std::unique_ptr<Cluster>{ new Cluster(*this, indices) };
return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) };
}
Cluster::Cluster(const Cluster & from, const std::vector<size_t> & indices)
Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings)
: shards_info{}, addresses_with_failover{}
{
if (from.addresses_with_failover.empty())
throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR);
std::set<std::pair<String, int>> unique_hosts;
for (size_t shard_index : ext::range(0, from.shards_info.size()))
{
const auto & replicas = from.addresses_with_failover[shard_index];
for (const auto & address : replicas)
{
if (!unique_hosts.emplace(address.host_name, address.port).second)
continue; /// Duplicate host, skip.
ShardInfo info;
if (address.is_local)
info.local_addresses.push_back(address);
ConnectionPoolPtr pool = std::make_shared<ConnectionPool>(
settings.distributed_connections_pool_size,
address.host_name,
address.port,
address.default_database,
address.user,
address.password,
"server",
address.compression,
address.secure);
info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
info.per_replica_pools = {std::move(pool)};
addresses_with_failover.emplace_back(Addresses{address});
shards_info.emplace_back(std::move(info));
}
}
initMisc();
}
Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector<size_t> & indices)
: shards_info{}
{
for (size_t index : indices)

View File

@ -26,7 +26,7 @@ public:
const String & username, const String & password,
UInt16 clickhouse_port, bool treat_local_as_remote, bool secure = false);
Cluster(const Cluster &) = delete;
Cluster(const Cluster &)= delete;
Cluster & operator=(const Cluster &) = delete;
/// is used to set a limit on the size of the timeout
@ -148,6 +148,9 @@ public:
/// Get a subcluster consisting of one or multiple shards - indexes by count (from 0) of the shard of this cluster.
std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
/// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const;
private:
using SlotToShard = std::vector<UInt64>;
SlotToShard slot_to_shard;
@ -159,7 +162,12 @@ private:
void initMisc();
/// For getClusterWithMultipleShards implementation.
Cluster(const Cluster & from, const std::vector<size_t> & indices);
struct SubclusterTag {};
Cluster(SubclusterTag, const Cluster & from, const std::vector<size_t> & indices);
/// For getClusterWithReplicasAsShards implementation
struct ReplicasAsShardsTag {};
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings);
String hash_of_addresses;
/// Description of the cluster shards.

View File

@ -111,7 +111,7 @@ struct ContextShared
mutable std::mutex embedded_dictionaries_mutex;
mutable std::mutex external_dictionaries_mutex;
mutable std::mutex external_models_mutex;
/// Separate mutex for re-initialization of zookeer session. This operation could take a long time and must not interfere with another operations.
/// Separate mutex for re-initialization of zookeeper session. This operation could take a long time and must not interfere with another operations.
mutable std::mutex zookeeper_mutex;
mutable zkutil::ZooKeeperPtr zookeeper; /// Client for ZooKeeper.
@ -191,7 +191,7 @@ struct ContextShared
/// Clusters for distributed tables
/// Initialized on demand (on distributed storages initialization) since Settings should be initialized
std::unique_ptr<Clusters> clusters;
ConfigurationPtr clusters_config; /// Soteres updated configs
ConfigurationPtr clusters_config; /// Stores updated configs
mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config
#if USE_EMBEDDED_COMPILER
@ -922,21 +922,21 @@ StoragePtr Context::tryGetExternalTable(const String & table_name) const
StoragePtr Context::getTable(const String & database_name, const String & table_name) const
{
Exception exc;
std::optional<Exception> exc;
auto res = getTableImpl(database_name, table_name, &exc);
if (!res)
throw exc;
throw *exc;
return res;
}
StoragePtr Context::tryGetTable(const String & database_name, const String & table_name) const
{
return getTableImpl(database_name, table_name, nullptr);
return getTableImpl(database_name, table_name, {});
}
StoragePtr Context::getTableImpl(const String & database_name, const String & table_name, Exception * exception) const
StoragePtr Context::getTableImpl(const String & database_name, const String & table_name, std::optional<Exception> * exception) const
{
String db;
DatabasePtr database;
@ -958,7 +958,7 @@ StoragePtr Context::getTableImpl(const String & database_name, const String & ta
if (shared->databases.end() == it)
{
if (exception)
*exception = Exception("Database " + backQuoteIfNeed(db) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE);
exception->emplace("Database " + backQuoteIfNeed(db) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE);
return {};
}
@ -969,7 +969,7 @@ StoragePtr Context::getTableImpl(const String & database_name, const String & ta
if (!table)
{
if (exception)
*exception = Exception("Table " + backQuoteIfNeed(db) + "." + backQuoteIfNeed(table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
exception->emplace("Table " + backQuoteIfNeed(db) + "." + backQuoteIfNeed(table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
return {};
}

View File

@ -589,7 +589,7 @@ private:
EmbeddedDictionaries & getEmbeddedDictionariesImpl(bool throw_on_error) const;
StoragePtr getTableImpl(const String & database_name, const String & table_name, Exception * exception) const;
StoragePtr getTableImpl(const String & database_name, const String & table_name, std::optional<Exception> * exception) const;
SessionKey getSessionKey(const String & session_id) const;

View File

@ -105,7 +105,7 @@ public:
if (node.name == NameAnd::name)
{
if (!node.arguments || node.arguments->children.empty())
throw Exception("Logical error: function requires argiment", ErrorCodes::LOGICAL_ERROR);
throw Exception("Logical error: function requires argument", ErrorCodes::LOGICAL_ERROR);
for (auto & child : node.arguments->children)
{

View File

@ -238,7 +238,7 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const
if (context.getSettingsRef().readonly)
{
LOG_WARNING(log, "Distributed DDL worker is run with readonly settings, it will not be able to execute DDL queries"
<< " Set apropriate system_profile or distributed_ddl.profile to fix this.");
<< " Set appropriate system_profile or distributed_ddl.profile to fix this.");
}
host_fqdn = getFQDNOrHostName();
@ -825,7 +825,7 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo
if (!zookeeper->exists(node_path, &stat))
continue;
/// Delete node if its lifetmie is expired (according to task_max_lifetime parameter)
/// Delete node if its lifetime is expired (according to task_max_lifetime parameter)
constexpr UInt64 zookeeper_time_resolution = 1000;
Int64 zookeeper_time_seconds = stat.ctime / zookeeper_time_resolution;
bool node_lifetime_is_expired = zookeeper_time_seconds + task_max_lifetime < current_time_seconds;

View File

@ -72,4 +72,6 @@ private:
std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);
std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);
using TablesWithColumnNames = std::vector<TableWithColumnNames>;
}

View File

@ -954,7 +954,7 @@ void ExpressionActions::finalize(const Names & output_columns)
/// remote table (doesn't know anything about it).
///
/// If we have combination of two previous cases, our heuristic from (1) can choose absolutely different columns,
/// so generated streams with these actions will have different headers. To avoid this we addionaly rename our "redundant" column
/// so generated streams with these actions will have different headers. To avoid this we additionally rename our "redundant" column
/// to DUMMY_COLUMN_NAME with help of COPY_COLUMN action and consequent remove of original column.
/// It doesn't affect any logic, but all streams will have same "redundant" column in header called "_dummy".

View File

@ -26,7 +26,6 @@
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/InJoinSubqueriesPreprocessor.h>
#include <Interpreters/LogicalExpressionsOptimizer.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/Set.h>
#include <Interpreters/AnalyzedJoin.h>
@ -287,7 +286,7 @@ SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr &
}
/// Perfomance optimisation for IN() if storage supports it.
/// Performance optimisation for IN() if storage supports it.
void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
{
if (!node || !storage() || !storage()->supportsIndexForIn())

View File

@ -19,7 +19,7 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create(
const std::string & name, const Poco::Util::AbstractConfiguration & config,
const std::string & key_in_config, const std::string & repository_name) const
{
/// For dictionaries from databases (created with DDL qureies) we have to perform
/// For dictionaries from databases (created with DDL queries) we have to perform
/// additional checks, so we identify them here.
bool dictionary_from_database = !repository_name.empty();
return DictionaryFactory::instance().create(name, config, key_in_config, context, dictionary_from_database);

View File

@ -609,7 +609,7 @@ public:
{
try
{
/// Maybe alredy true, if we have an exception
/// Maybe already true, if we have an exception
if (!should_update_flag)
should_update_flag = object->isModified();
}

View File

@ -0,0 +1,79 @@
#include <Interpreters/ExtractExpressionInfoVisitor.h>
#include <Functions/FunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Parsers/ASTSubquery.h>
namespace DB
{
void ExpressionInfoMatcher::visit(const ASTPtr & ast, Data & data)
{
if (const auto * function = ast->as<ASTFunction>())
visit(*function, ast, data);
else if (const auto * identifier = ast->as<ASTIdentifier>())
visit(*identifier, ast, data);
}
void ExpressionInfoMatcher::visit(const ASTFunction & ast_function, const ASTPtr &, Data & data)
{
if (ast_function.name == "arrayJoin")
data.is_array_join = true;
else if (AggregateFunctionFactory::instance().isAggregateFunctionName(ast_function.name))
data.is_aggregate_function = true;
else
{
const auto & function = FunctionFactory::instance().tryGet(ast_function.name, data.context);
/// Skip lambda, tuple and other special functions
if (function && function->isStateful())
data.is_stateful_function = true;
}
}
void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Data & data)
{
if (!identifier.compound())
{
for (size_t index = 0; index < data.tables.size(); ++index)
{
const auto & columns = data.tables[index].columns;
// TODO: make sure no collision ever happens
if (std::find(columns.begin(), columns.end(), identifier.name) != columns.end())
{
data.unique_reference_tables_pos.emplace(index);
break;
}
}
}
else
{
size_t best_table_pos = 0;
if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos))
data.unique_reference_tables_pos.emplace(best_table_pos);
}
}
bool ExpressionInfoMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !node->as<ASTSubquery>();
}
bool hasStatefulFunction(const ASTPtr & node, const Context & context)
{
for (const auto & select_expression : node->children)
{
ExpressionInfoVisitor::Data expression_info{.context = context, .tables = {}};
ExpressionInfoVisitor(expression_info).visit(select_expression);
if (expression_info.is_stateful_function)
return true;
}
return false;
}
}

View File

@ -0,0 +1,40 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
namespace DB
{
class Context;
struct ExpressionInfoMatcher
{
struct Data
{
const Context & context;
const std::vector<TableWithColumnNames> & tables;
bool is_array_join = false;
bool is_stateful_function = false;
bool is_aggregate_function = false;
std::unordered_set<size_t> unique_reference_tables_pos = {};
};
static void visit(const ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr &);
static void visit(const ASTFunction & ast_function, const ASTPtr &, Data & data);
static void visit(const ASTIdentifier & identifier, const ASTPtr &, Data & data);
};
using ExpressionInfoVisitor = ConstInDepthNodeVisitor<ExpressionInfoMatcher, true>;
bool hasStatefulFunction(const ASTPtr & node, const Context & context);
}

View File

@ -1,16 +0,0 @@
#include <Interpreters/ExtractFunctionDataVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
void ExtractFunctionData::visit(ASTFunction & function, ASTPtr &)
{
if (AggregateFunctionFactory::instance().isAggregateFunctionName(function.name))
aggregate_functions.emplace_back(&function);
else
functions.emplace_back(&function);
}
}

View File

@ -1,25 +0,0 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
struct ExtractFunctionData
{
using TypeToVisit = ASTFunction;
std::vector<ASTFunction *> functions;
std::vector<ASTFunction *> aggregate_functions;
void visit(ASTFunction & identifier, ASTPtr &);
};
using ExtractFunctionMatcher = OneTypeMatcher<ExtractFunctionData>;
using ExtractFunctionVisitor = InDepthNodeVisitor<ExtractFunctionMatcher, true>;
}

View File

@ -1,40 +0,0 @@
#include <Interpreters/FindIdentifierBestTableVisitor.h>
#include <Interpreters/IdentifierSemantic.h>
namespace DB
{
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_)
: tables(tables_)
{
}
void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &)
{
const DatabaseAndTableWithAlias * best_table = nullptr;
if (!identifier.compound())
{
for (const auto & table_names : tables)
{
auto & columns = table_names.columns;
if (std::find(columns.begin(), columns.end(), identifier.name) != columns.end())
{
// TODO: make sure no collision ever happens
if (!best_table)
best_table = &table_names.table;
}
}
}
else
{
size_t best_table_pos = 0;
if (IdentifierSemantic::chooseTable(identifier, tables, best_table_pos))
best_table = &tables[best_table_pos].table;
}
identifier_table.emplace_back(&identifier, best_table);
}
}

View File

@ -1,27 +0,0 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
namespace DB
{
struct FindIdentifierBestTableData
{
using TypeToVisit = ASTIdentifier;
using IdentifierWithTable = std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>;
const std::vector<TableWithColumnNames> & tables;
std::vector<IdentifierWithTable> identifier_table;
FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_);
void visit(ASTIdentifier & identifier, ASTPtr &);
};
using FindIdentifierBestTableMatcher = OneTypeMatcher<FindIdentifierBestTableData>;
using FindIdentifierBestTableVisitor = InDepthNodeVisitor<FindIdentifierBestTableMatcher, true>;
}

View File

@ -59,7 +59,13 @@ public:
using Data = Data_;
using TypeToVisit = typename Data::TypeToVisit;
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return visit_children; }
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
{
if (node && node->as<TypeToVisit>())
return visit_children;
return true;
}
static void visit(T & ast, Data & data)
{

View File

@ -503,28 +503,31 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
/// Do all AST changes here, because actions from analysis_result will be used later in readImpl.
/// PREWHERE optimization.
/// Turn off, if the table filter (row-level security) is applied.
if (storage && !context->getRowPolicy()->getCondition(storage->getDatabaseName(), storage->getTableName(), RowPolicy::SELECT_FILTER))
if (storage)
{
query_analyzer->makeSetsForIndex(query.where());
query_analyzer->makeSetsForIndex(query.prewhere());
auto optimize_prewhere = [&](auto & merge_tree)
/// PREWHERE optimization.
/// Turn off, if the table filter (row-level security) is applied.
if (!context->getRowPolicy()->getCondition(storage->getDatabaseName(), storage->getTableName(), RowPolicy::SELECT_FILTER))
{
SelectQueryInfo current_info;
current_info.query = query_ptr;
current_info.syntax_analyzer_result = syntax_analyzer_result;
current_info.sets = query_analyzer->getPreparedSets();
auto optimize_prewhere = [&](auto & merge_tree)
{
SelectQueryInfo current_info;
current_info.query = query_ptr;
current_info.syntax_analyzer_result = syntax_analyzer_result;
current_info.sets = query_analyzer->getPreparedSets();
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere && query.where() && !query.prewhere() && !query.final())
MergeTreeWhereOptimizer{current_info, *context, merge_tree,
syntax_analyzer_result->requiredSourceColumns(), log};
};
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere && query.where() && !query.prewhere() && !query.final())
MergeTreeWhereOptimizer{current_info, *context, merge_tree,
syntax_analyzer_result->requiredSourceColumns(), log};
};
if (const auto * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get()))
optimize_prewhere(*merge_tree_data);
if (const auto * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get()))
optimize_prewhere(*merge_tree_data);
}
}
if (storage && !options.only_analyze)
@ -1180,7 +1183,6 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
if (expressions.second_stage)
{
bool need_second_distinct_pass = false;
bool need_merge_streams = false;
if (expressions.need_aggregate)
{
@ -1216,7 +1218,7 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
}
else if (query.group_by_with_totals || query.group_by_with_rollup || query.group_by_with_cube)
throw Exception("WITH TOTALS, ROLLUP or CUBE are not supported without aggregation", ErrorCodes::LOGICAL_ERROR);
throw Exception("WITH TOTALS, ROLLUP or CUBE are not supported without aggregation", ErrorCodes::NOT_IMPLEMENTED);
need_second_distinct_pass = query.distinct && pipeline.hasMixedStreams();
@ -1241,13 +1243,11 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
executePreLimit(pipeline);
}
if (need_second_distinct_pass
|| query.limitLength()
|| query.limitBy()
|| pipeline.hasDelayedStream())
{
need_merge_streams = true;
}
bool need_merge_streams = need_second_distinct_pass || query.limitLength() || query.limitBy();
if constexpr (!pipeline_with_processors)
if (pipeline.hasDelayedStream())
need_merge_streams = true;
if (need_merge_streams)
{
@ -1933,7 +1933,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const
* 1. Parallel aggregation is done, and the results should be merged in parallel.
* 2. An aggregation is done with store of temporary data on the disk, and they need to be merged in a memory efficient way.
*/
bool allow_to_use_two_level_group_by = pipeline.getNumMainStreams() > 1 || settings.max_bytes_before_external_group_by != 0;
bool allow_to_use_two_level_group_by = pipeline.getNumStreams() > 1 || settings.max_bytes_before_external_group_by != 0;
Aggregator::Params params(header_before_aggregation, keys, aggregates,
overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode,
@ -1947,12 +1947,12 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const
pipeline.dropTotalsIfHas();
/// If there are several sources, then we perform parallel aggregation
if (pipeline.getNumMainStreams() > 1)
if (pipeline.getNumStreams() > 1)
{
/// Add resize transform to uniformly distribute data between aggregating streams.
pipeline.resize(pipeline.getNumMainStreams(), true);
pipeline.resize(pipeline.getNumStreams(), true);
auto many_data = std::make_shared<ManyAggregatedData>(pipeline.getNumMainStreams());
auto many_data = std::make_shared<ManyAggregatedData>(pipeline.getNumStreams());
auto merge_threads = settings.aggregation_memory_efficient_merge_threads
? static_cast<size_t>(settings.aggregation_memory_efficient_merge_threads)
: static_cast<size_t>(settings.max_threads);
@ -2351,9 +2351,6 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting
return std::make_shared<PartialSortingTransform>(header, output_order_descr, limit, do_count_rows);
});
/// If there are several streams, we merge them into one
pipeline.resize(1);
/// Merge the sorted blocks.
pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
{
@ -2362,11 +2359,12 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting
return std::make_shared<MergeSortingTransform>(
header, output_order_descr, settings.max_block_size, limit,
settings.max_bytes_before_remerge_sort,
settings.max_bytes_before_remerge_sort / pipeline.getNumStreams(),
settings.max_bytes_before_external_sort, context->getTemporaryPath(), settings.min_free_disk_space_for_temporary_data);
});
pipeline.enableQuotaForCurrentStreams();
/// If there are several streams, we merge them into one
executeMergeSorted(pipeline, output_order_descr, limit);
}
@ -2807,11 +2805,7 @@ void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, SubqueriesForSets & subqueries_for_sets)
{
if (query_info.input_sorting_info)
{
if (pipeline.hasDelayedStream())
throw Exception("Using read in order optimization, but has delayed stream in pipeline", ErrorCodes::LOGICAL_ERROR);
executeMergeSorted(pipeline, query_info.input_sorting_info->order_key_prefix_descr, 0);
}
const Settings & settings = context->getSettingsRef();
@ -2828,7 +2822,7 @@ void InterpreterSelectQuery::unifyStreams(Pipeline & pipeline, Block header)
{
/// Unify streams in case they have different headers.
/// TODO: remove previos addition of _dummy column.
/// TODO: remove previous addition of _dummy column.
if (header.columns() > 1 && header.has("_dummy"))
header.erase("_dummy");

View File

@ -306,7 +306,7 @@ size_t Join::getTotalByteCount() const
void Join::setSampleBlock(const Block & block)
{
/// You have to restore this lock if you call the fuction outside of ctor.
/// You have to restore this lock if you call the function outside of ctor.
//std::unique_lock lock(rwlock);
LOG_DEBUG(log, "setSampleBlock: " << block.dumpStructure());
@ -778,7 +778,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
}
else if constexpr ((is_any_join || is_semi_join) && right)
{
/// Use first appered left key + it needs left columns replication
/// Use first appeared left key + it needs left columns replication
if (mapped.setUsedOnce())
{
setUsed<need_filter>(filter, i);
@ -787,7 +787,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
}
else if constexpr (is_any_join && KIND == ASTTableJoin::Kind::Inner)
{
/// Use first appered left key only
/// Use first appeared left key only
if (mapped.setUsedOnce())
{
setUsed<need_filter>(filter, i);

View File

@ -527,7 +527,7 @@ void MergeJoin::mergeFlushedRightBlocks()
lsm->merge(callback);
flushed_right_blocks.swap(lsm->sorted_files.front());
/// Get memory limit or aproximate it from row limit and bytes per row factor
/// Get memory limit or approximate it from row limit and bytes per row factor
UInt64 memory_limit = size_limits.max_bytes;
UInt64 rows_limit = size_limits.max_rows;
if (!memory_limit && rows_limit)

View File

@ -56,7 +56,7 @@ public:
private:
/// There're two size limits for right-hand table: max_rows_in_join, max_bytes_in_join.
/// max_bytes is prefered. If it isn't set we aproximate it as (max_rows * bytes/row).
/// max_bytes is prefered. If it isn't set we approximate it as (max_rows * bytes/row).
struct BlockByteWeight
{
size_t operator()(const Block & block) const { return block.bytes(); }

View File

@ -68,7 +68,7 @@ ASTs OptimizeIfChainsVisitor::ifChain(const ASTPtr & child)
const auto * else_arg = function_args->children[2]->as<ASTFunction>();
/// Recursively collect arguments from the innermost if ("head-resursion").
/// Recursively collect arguments from the innermost if ("head-recursion").
/// Arguments will be returned in reverse order.
if (else_arg && else_arg->name == "if")

View File

@ -1,32 +1,13 @@
#include <iostream>
#include <Common/typeid_cast.h>
#include <Storages/IStorage.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/IdentifierSemantic.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/queryToString.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/MarkTableIdentifiersVisitor.h>
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/FindIdentifierBestTableVisitor.h>
#include <Interpreters/ExtractFunctionDataVisitor.h>
#include <Interpreters/getTableExpressions.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/PredicateRewriteVisitor.h>
#include <Interpreters/ExtractExpressionInfoVisitor.h>
namespace DB
@ -38,155 +19,105 @@ namespace ErrorCodes
extern const int UNKNOWN_ELEMENT_IN_AST;
}
namespace
{
constexpr auto and_function_name = "and";
String qualifiedName(ASTIdentifier * identifier, const String & prefix)
{
if (identifier->isShort())
return prefix + identifier->getAliasOrColumnName();
return identifier->getAliasOrColumnName();
}
}
PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_)
: ast_select(ast_select_), settings(settings_), context(context_)
const Context & context_, const TablesWithColumnNames & tables_with_columns_, const Settings & settings_)
: context(context_), tables_with_columns(tables_with_columns_), settings(settings_)
{
}
bool PredicateExpressionsOptimizer::optimize()
bool PredicateExpressionsOptimizer::optimize(ASTSelectQuery & select_query)
{
if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables() || ast_select->tables()->children.empty())
if (!settings.enable_optimize_predicate_expression)
return false;
if (!ast_select->where() && !ast_select->prewhere())
if (select_query.having() && (!select_query.group_by_with_cube && !select_query.group_by_with_rollup && !select_query.group_by_with_totals))
tryMovePredicatesFromHavingToWhere(select_query);
if (!select_query.tables() || select_query.tables()->children.empty())
return false;
if (ast_select->array_join_expression_list())
if ((!select_query.where() && !select_query.prewhere()) || select_query.array_join_expression_list())
return false;
SubqueriesProjectionColumns all_subquery_projection_columns = getAllSubqueryProjectionColumns();
const auto & tables_predicates = extractTablesPredicates(select_query.where(), select_query.prewhere());
bool is_rewrite_subqueries = false;
if (!all_subquery_projection_columns.empty())
{
is_rewrite_subqueries |= optimizeImpl(ast_select->where(), all_subquery_projection_columns, OptimizeKind::PUSH_TO_WHERE);
is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere(), all_subquery_projection_columns, OptimizeKind::PUSH_TO_PREWHERE);
}
if (!tables_predicates.empty())
return tryRewritePredicatesToTables(select_query.refTables()->children, tables_predicates);
return is_rewrite_subqueries;
return false;
}
bool PredicateExpressionsOptimizer::optimizeImpl(
const ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
static ASTs splitConjunctionPredicate(const std::initializer_list<const ASTPtr> & predicates)
{
/// split predicate with `and`
std::vector<ASTPtr> outer_predicate_expressions = splitConjunctionPredicate(outer_expression);
std::vector<ASTPtr> res;
std::vector<const ASTTableExpression *> table_expressions = getTableExpressions(*ast_select);
std::vector<TableWithColumnNames> tables_with_columns = getDatabaseAndTablesWithColumnNames(table_expressions, context);
bool is_rewrite_subquery = false;
for (auto & outer_predicate : outer_predicate_expressions)
auto remove_expression_at_index = [&res] (const size_t index)
{
if (isArrayJoinFunction(outer_predicate))
if (index < res.size() - 1)
std::swap(res[index], res.back());
res.pop_back();
};
for (const auto & predicate : predicates)
{
if (!predicate)
continue;
auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, tables_with_columns);
res.emplace_back(predicate);
/// TODO: remove origin expression
for (const auto & [subquery, projection_columns] : subqueries_projection_columns)
for (size_t idx = 0; idx < res.size();)
{
OptimizeKind optimize_kind = OptimizeKind::NONE;
if (allowPushDown(subquery, outer_predicate, projection_columns, outer_predicate_dependencies, optimize_kind))
const auto & expression = res.at(idx);
if (const auto * function = expression->as<ASTFunction>(); function && function->name == "and")
{
if (optimize_kind == OptimizeKind::NONE)
optimize_kind = expression_kind;
for (auto & child : function->arguments->children)
res.emplace_back(child);
ASTPtr inner_predicate = outer_predicate->clone();
cleanExpressionAlias(inner_predicate); /// clears the alias name contained in the outer predicate
remove_expression_at_index(idx);
continue;
}
++idx;
}
}
std::vector<IdentifierWithQualifier> inner_predicate_dependencies =
getDependenciesAndQualifiers(inner_predicate, tables_with_columns);
return res;
}
setNewAliasesForInnerPredicate(projection_columns, inner_predicate_dependencies);
std::vector<ASTs> PredicateExpressionsOptimizer::extractTablesPredicates(const ASTPtr & where, const ASTPtr & prewhere)
{
std::vector<ASTs> tables_predicates(tables_with_columns.size());
switch (optimize_kind)
{
case OptimizeKind::NONE: continue;
case OptimizeKind::PUSH_TO_WHERE:
is_rewrite_subquery |= optimizeExpression(inner_predicate, subquery, ASTSelectQuery::Expression::WHERE);
continue;
case OptimizeKind::PUSH_TO_HAVING:
is_rewrite_subquery |= optimizeExpression(inner_predicate, subquery, ASTSelectQuery::Expression::HAVING);
continue;
case OptimizeKind::PUSH_TO_PREWHERE:
is_rewrite_subquery |= optimizeExpression(inner_predicate, subquery, ASTSelectQuery::Expression::PREWHERE);
continue;
}
for (const auto & predicate_expression : splitConjunctionPredicate({where, prewhere}))
{
ExpressionInfoVisitor::Data expression_info{.context = context, .tables = tables_with_columns};
ExpressionInfoVisitor(expression_info).visit(predicate_expression);
if (expression_info.is_stateful_function)
return {}; /// give up the optimization when the predicate contains stateful function
if (!expression_info.is_array_join)
{
if (expression_info.unique_reference_tables_pos.size() == 1)
tables_predicates[*expression_info.unique_reference_tables_pos.begin()].emplace_back(predicate_expression);
else if (expression_info.unique_reference_tables_pos.size() == 0)
{
for (size_t index = 0; index < tables_predicates.size(); ++index)
tables_predicates[index].emplace_back(predicate_expression);
}
}
}
return is_rewrite_subquery;
return tables_predicates; /// everything is OK, it can be optimized
}
bool PredicateExpressionsOptimizer::allowPushDown(
const ASTSelectQuery * subquery,
const ASTPtr &,
const std::vector<ProjectionWithAlias> & projection_columns,
const std::vector<IdentifierWithQualifier> & dependencies,
OptimizeKind & optimize_kind)
bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_element, const std::vector<ASTs> & tables_predicates)
{
if (!subquery
|| (!settings.enable_optimize_predicate_expression_to_final_subquery && subquery->final())
|| subquery->limitBy() || subquery->limitLength()
|| subquery->with() || subquery->withFill())
return false;
else
bool is_rewrite_tables = false;
for (size_t index = tables_element.size(); index > 0; --index)
{
ASTPtr expr_list = ast_select->select();
ExtractFunctionVisitor::Data extract_data;
ExtractFunctionVisitor(extract_data).visit(expr_list);
for (const auto & subquery_function : extract_data.functions)
{
const auto & function = FunctionFactory::instance().tryGet(subquery_function->name, context);
/// Skip lambda, tuple and other special functions
if (function && function->isStateful())
return false;
}
}
const auto * ast_join = ast_select->join();
const ASTTableExpression * left_table_expr = nullptr;
const ASTTableExpression * right_table_expr = nullptr;
const ASTSelectQuery * left_subquery = nullptr;
const ASTSelectQuery * right_subquery = nullptr;
if (ast_join)
{
left_table_expr = ast_select
->tables()->as<ASTTablesInSelectQuery>()
->children[0]->as<ASTTablesInSelectQueryElement>()
->table_expression->as<ASTTableExpression>();
right_table_expr = ast_select
->tables()->as<ASTTablesInSelectQuery>()
->children[1]->as<ASTTablesInSelectQueryElement>()
->table_expression->as<ASTTableExpression>();
if (left_table_expr && left_table_expr->subquery)
left_subquery = left_table_expr->subquery
->children[0]->as<ASTSelectWithUnionQuery>()
->list_of_selects->children[0]->as<ASTSelectQuery>();
if (right_table_expr && right_table_expr->subquery)
right_subquery = right_table_expr->subquery
->children[0]->as<ASTSelectWithUnionQuery>()
->list_of_selects->children[0]->as<ASTSelectQuery>();
size_t table_pos = index - 1;
/// NOTE: the syntactic way of pushdown has limitations and should be partially disabled in case of JOINs.
/// Let's take a look at the query:
@ -201,326 +132,84 @@ bool PredicateExpressionsOptimizer::allowPushDown(
/// It happens because the not-matching columns are replaced with a global default values on JOIN.
/// Same is true for RIGHT JOIN and FULL JOIN.
/// Check right side for LEFT'o'FULL JOIN
if (isLeftOrFull(ast_join->table_join->as<ASTTableJoin>()->kind) && right_subquery == subquery)
return false;
/// Check left side for RIGHT'o'FULL JOIN
if (isRightOrFull(ast_join->table_join->as<ASTTableJoin>()->kind) && left_subquery == subquery)
return false;
}
return checkDependencies(projection_columns, dependencies, optimize_kind);
}
bool PredicateExpressionsOptimizer::checkDependencies(
const std::vector<ProjectionWithAlias> & projection_columns,
const std::vector<IdentifierWithQualifier> & dependencies,
OptimizeKind & optimize_kind)
{
for (const auto & [identifier, prefix] : dependencies)
{
bool is_found = false;
String qualified_name = qualifiedName(identifier, prefix);
for (const auto & [ast, alias] : projection_columns)
if (const auto & table_element = tables_element[table_pos]->as<ASTTablesInSelectQueryElement>())
{
if (alias == qualified_name)
{
is_found = true;
ASTPtr projection_column = ast;
ExtractFunctionVisitor::Data extract_data;
ExtractFunctionVisitor(extract_data).visit(projection_column);
if (table_element->table_join && isLeft(table_element->table_join->as<ASTTableJoin>()->kind))
continue; /// Skip right table optimization
if (!extract_data.aggregate_functions.empty())
optimize_kind = OptimizeKind::PUSH_TO_HAVING;
}
}
if (table_element->table_join && isFull(table_element->table_join->as<ASTTableJoin>()->kind))
break; /// Skip left and right table optimization
if (!is_found)
return false;
}
is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos],
tables_with_columns[table_pos].columns);
return true;
}
std::vector<ASTPtr> PredicateExpressionsOptimizer::splitConjunctionPredicate(const ASTPtr & predicate_expression)
{
std::vector<ASTPtr> predicate_expressions;
if (predicate_expression)
{
predicate_expressions.emplace_back(predicate_expression);
auto remove_expression_at_index = [&predicate_expressions] (const size_t index)
{
if (index < predicate_expressions.size() - 1)
std::swap(predicate_expressions[index], predicate_expressions.back());
predicate_expressions.pop_back();
};
for (size_t idx = 0; idx < predicate_expressions.size();)
{
const auto expression = predicate_expressions.at(idx);
if (const auto * function = expression->as<ASTFunction>())
{
if (function->name == and_function_name)
{
for (auto & child : function->arguments->children)
predicate_expressions.emplace_back(child);
remove_expression_at_index(idx);
continue;
}
}
++idx;
if (table_element->table_join && isRight(table_element->table_join->as<ASTTableJoin>()->kind))
break; /// Skip left table optimization
}
}
return predicate_expressions;
return is_rewrite_tables;
}
std::vector<PredicateExpressionsOptimizer::IdentifierWithQualifier>
PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<TableWithColumnNames> & tables)
bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, const Names & table_column) const
{
FindIdentifierBestTableVisitor::Data find_data(tables);
FindIdentifierBestTableVisitor(find_data).visit(expression);
std::vector<IdentifierWithQualifier> dependencies;
for (const auto & [identifier, table] : find_data.identifier_table)
if (!table_predicates.empty())
{
String table_alias;
if (table)
table_alias = table->getQualifiedNamePrefix();
auto optimize_final = settings.enable_optimize_predicate_expression_to_final_subquery;
PredicateRewriteVisitor::Data data(context, table_predicates, table_column, optimize_final);
dependencies.emplace_back(identifier, table_alias);
PredicateRewriteVisitor(data).visit(table_element);
return data.is_rewrite;
}
return dependencies;
}
void PredicateExpressionsOptimizer::setNewAliasesForInnerPredicate(
const std::vector<ProjectionWithAlias> & projection_columns,
const std::vector<IdentifierWithQualifier> & dependencies)
{
for (auto & [identifier, prefix] : dependencies)
{
String qualified_name = qualifiedName(identifier, prefix);
for (auto & [ast, alias] : projection_columns)
{
if (alias == qualified_name)
{
String name;
if (auto * id = ast->as<ASTIdentifier>())
{
name = id->tryGetAlias();
if (name.empty())
name = id->shortName();
}
else
{
if (ast->tryGetAlias().empty())
ast->setAlias(ast->getColumnName());
name = ast->getAliasOrColumnName();
}
identifier->setShortName(name);
}
}
}
}
bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node)
{
if (const auto * function = node->as<ASTFunction>())
{
if (function->name == "arrayJoin")
return true;
}
for (auto & child : node->children)
if (isArrayJoinFunction(child))
return true;
return false;
}
bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expression, ASTSelectQuery * subquery,
ASTSelectQuery::Expression expr)
bool PredicateExpressionsOptimizer::tryMovePredicatesFromHavingToWhere(ASTSelectQuery & select_query)
{
ASTPtr subquery_expression = subquery->getExpression(expr, false);
subquery_expression = subquery_expression ? makeASTFunction(and_function_name, outer_expression, subquery_expression) : outer_expression;
ASTs where_predicates;
ASTs having_predicates;
const auto & reduce_predicates = [&](const ASTs & predicates)
{
ASTPtr res = predicates[0];
for (size_t index = 1; index < predicates.size(); ++index)
res = makeASTFunction("and", res, predicates[index]);
return res;
};
for (const auto & moving_predicate: splitConjunctionPredicate({select_query.having()}))
{
ExpressionInfoVisitor::Data expression_info{.context = context, .tables = {}};
ExpressionInfoVisitor(expression_info).visit(moving_predicate);
/// TODO: If there is no group by, where, and prewhere expression, we can push down the stateful function
if (expression_info.is_stateful_function)
return false;
if (expression_info.is_aggregate_function)
having_predicates.emplace_back(moving_predicate);
else
where_predicates.emplace_back(moving_predicate);
}
if (having_predicates.empty())
select_query.setExpression(ASTSelectQuery::Expression::HAVING, {});
else
{
auto having_predicate = reduce_predicates(having_predicates);
select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(having_predicate));
}
if (!where_predicates.empty())
{
auto moved_predicate = reduce_predicates(where_predicates);
moved_predicate = select_query.where() ? makeASTFunction("and", select_query.where(), moved_predicate) : moved_predicate;
select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(moved_predicate));
}
subquery->setExpression(expr, std::move(subquery_expression));
return true;
}
PredicateExpressionsOptimizer::SubqueriesProjectionColumns PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns()
{
SubqueriesProjectionColumns projection_columns;
for (const auto & table_expression : getTableExpressions(*ast_select))
if (table_expression->subquery)
getSubqueryProjectionColumns(table_expression->subquery, projection_columns);
return projection_columns;
}
void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(const ASTPtr & subquery, SubqueriesProjectionColumns & projection_columns)
{
String qualified_name_prefix = subquery->tryGetAlias();
if (!qualified_name_prefix.empty())
qualified_name_prefix += '.';
const ASTPtr & subselect = subquery->children[0];
ASTs select_with_union_projections;
const auto * select_with_union_query = subselect->as<ASTSelectWithUnionQuery>();
for (auto & select : select_with_union_query->list_of_selects->children)
{
std::vector<ProjectionWithAlias> subquery_projections;
auto select_projection_columns = getSelectQueryProjectionColumns(select);
if (!select_projection_columns.empty())
{
if (select_with_union_projections.empty())
select_with_union_projections = select_projection_columns;
for (size_t i = 0; i < select_projection_columns.size(); i++)
subquery_projections.emplace_back(std::pair(select_projection_columns[i],
qualified_name_prefix + select_with_union_projections[i]->getAliasOrColumnName()));
projection_columns.insert(std::pair(select->as<ASTSelectQuery>(), subquery_projections));
}
}
}
ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast)
{
ASTs projection_columns;
auto * select_query = ast->as<ASTSelectQuery>();
/// first should normalize query tree.
std::unordered_map<String, ASTPtr> aliases;
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
/// TODO: get tables from evaluateAsterisk instead of tablesOnly() to extract asterisks in general way
std::vector<TableWithColumnNames> tables_with_columns = TranslateQualifiedNamesVisitor::Data::tablesOnly(tables);
TranslateQualifiedNamesVisitor::Data qn_visitor_data({}, std::move(tables_with_columns), false);
TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast);
QueryAliasesVisitor::Data query_aliases_data{aliases};
QueryAliasesVisitor(query_aliases_data).visit(ast);
MarkTableIdentifiersVisitor::Data mark_tables_data{aliases};
MarkTableIdentifiersVisitor(mark_tables_data).visit(ast);
QueryNormalizer::Data normalizer_data(aliases, settings);
QueryNormalizer(normalizer_data).visit(ast);
for (const auto & projection_column : select_query->select()->children)
{
if (projection_column->as<ASTAsterisk>() || projection_column->as<ASTQualifiedAsterisk>() || projection_column->as<ASTColumnsMatcher>())
{
ASTs evaluated_columns = evaluateAsterisk(select_query, projection_column);
for (const auto & column : evaluated_columns)
projection_columns.emplace_back(column);
continue;
}
projection_columns.emplace_back(projection_column);
}
return projection_columns;
}
ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk)
{
/// SELECT *, SELECT dummy, SELECT 1 AS id
if (!select_query->tables() || select_query->tables()->children.empty())
return {};
std::vector<const ASTTableExpression *> tables_expression = getTableExpressions(*select_query);
if (const auto * qualified_asterisk = asterisk->as<ASTQualifiedAsterisk>())
{
if (qualified_asterisk->children.size() != 1)
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
for (auto it = tables_expression.begin(); it != tables_expression.end();)
{
const ASTTableExpression * table_expression = *it;
DatabaseAndTableWithAlias database_and_table_with_alias(*table_expression, context.getCurrentDatabase());
if (ident_db_and_name.satisfies(database_and_table_with_alias, true))
++it;
else
it = tables_expression.erase(it); /// It's not a required table
}
}
ASTs projection_columns;
for (auto & table_expression : tables_expression)
{
if (table_expression->subquery)
{
const auto * subquery = table_expression->subquery->as<ASTSubquery>();
const auto * select_with_union_query = subquery->children[0]->as<ASTSelectWithUnionQuery>();
const auto subquery_projections = getSelectQueryProjectionColumns(select_with_union_query->list_of_selects->children[0]);
projection_columns.insert(projection_columns.end(), subquery_projections.begin(), subquery_projections.end());
}
else
{
StoragePtr storage;
if (table_expression->table_function)
{
auto query_context = const_cast<Context *>(&context.getQueryContext());
storage = query_context->executeTableFunction(table_expression->table_function);
}
else if (table_expression->database_and_table_name)
{
const auto * database_and_table_ast = table_expression->database_and_table_name->as<ASTIdentifier>();
DatabaseAndTableWithAlias database_and_table_name(*database_and_table_ast);
storage = context.getTable(database_and_table_name.database, database_and_table_name.table);
}
else
throw Exception("Logical error: unexpected table expression", ErrorCodes::LOGICAL_ERROR);
const auto block = storage->getSampleBlock();
if (const auto * asterisk_pattern = asterisk->as<ASTColumnsMatcher>())
{
for (size_t idx = 0; idx < block.columns(); ++idx)
{
auto & col = block.getByPosition(idx);
if (asterisk_pattern->isColumnMatching(col.name))
projection_columns.emplace_back(std::make_shared<ASTIdentifier>(col.name));
}
}
else
{
for (size_t idx = 0; idx < block.columns(); ++idx)
projection_columns.emplace_back(std::make_shared<ASTIdentifier>(block.getByPosition(idx).name));
}
}
}
return projection_columns;
}
void PredicateExpressionsOptimizer::cleanExpressionAlias(ASTPtr & expression)
{
const auto my_alias = expression->tryGetAlias();
if (!my_alias.empty())
expression->setAlias("");
for (auto & child : expression->children)
cleanExpressionAlias(child);
}
}

View File

@ -1,110 +1,53 @@
#pragma once
#include "DatabaseAndTableWithAlias.h"
#include <Parsers/ASTSelectQuery.h>
#include <map>
#include <Interpreters/DatabaseAndTableWithAlias.h>
namespace DB
{
class ASTIdentifier;
class ASTSubquery;
class Context;
struct Settings;
/** This class provides functions for Push-Down predicate expressions
*
* The Example:
* - Query before optimization :
* SELECT id_1, name_1 FROM (SELECT id_1, name_1 FROM table_a UNION ALL SELECT id_2, name_2 FROM table_b)
* WHERE id_1 = 1
* - Query after optimization :
* SELECT id_1, name_1 FROM (SELECT id_1, name_1 FROM table_a WHERE id_1 = 1 UNION ALL SELECT id_2, name_2 FROM table_b WHERE id_2 = 1)
* WHERE id_1 = 1
/** Predicate optimization based on rewriting ast rules
* For more details : https://github.com/ClickHouse/ClickHouse/pull/2015#issuecomment-374283452
* The optimizer does two different optimizations
* - Move predicates from having to where
* - Push the predicate down from the current query to the having of the subquery
*/
class PredicateExpressionsOptimizer
{
using ProjectionWithAlias = std::pair<ASTPtr, String>;
using SubqueriesProjectionColumns = std::map<ASTSelectQuery *, std::vector<ProjectionWithAlias>>;
using IdentifierWithQualifier = std::pair<ASTIdentifier *, String>;
public:
PredicateExpressionsOptimizer(const Context & context_, const TablesWithColumnNames & tables_with_columns_, const Settings & settings_);
bool optimize(ASTSelectQuery & select_query);
private:
/// Extracts settings, mostly to show which are used and which are not.
struct ExtractedSettings
{
/// QueryNormalizer settings
const UInt64 max_ast_depth;
const UInt64 max_expanded_ast_elements;
const String count_distinct_implementation;
/// for PredicateExpressionsOptimizer
const bool enable_optimize_predicate_expression;
const bool enable_optimize_predicate_expression_to_final_subquery;
const bool join_use_nulls;
template<typename T>
ExtractedSettings(const T & settings_)
: max_ast_depth(settings_.max_ast_depth),
max_expanded_ast_elements(settings_.max_expanded_ast_elements),
count_distinct_implementation(settings_.count_distinct_implementation),
enable_optimize_predicate_expression(settings_.enable_optimize_predicate_expression),
enable_optimize_predicate_expression_to_final_subquery(settings_.enable_optimize_predicate_expression_to_final_subquery),
join_use_nulls(settings_.join_use_nulls)
: enable_optimize_predicate_expression(settings_.enable_optimize_predicate_expression),
enable_optimize_predicate_expression_to_final_subquery(settings_.enable_optimize_predicate_expression_to_final_subquery)
{}
};
public:
PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_);
bool optimize();
private:
ASTSelectQuery * ast_select;
const ExtractedSettings settings;
const Context & context;
const std::vector<TableWithColumnNames> & tables_with_columns;
enum OptimizeKind
{
NONE,
PUSH_TO_PREWHERE,
PUSH_TO_WHERE,
PUSH_TO_HAVING,
};
const ExtractedSettings settings;
bool isArrayJoinFunction(const ASTPtr & node);
std::vector<ASTs> extractTablesPredicates(const ASTPtr & where, const ASTPtr & prewhere);
std::vector<ASTPtr> splitConjunctionPredicate(const ASTPtr & predicate_expression);
bool tryRewritePredicatesToTables(ASTs & tables_element, const std::vector<ASTs> & tables_predicates);
std::vector<IdentifierWithQualifier> getDependenciesAndQualifiers(ASTPtr & expression,
std::vector<TableWithColumnNames> & tables_with_aliases);
bool tryRewritePredicatesToTable(ASTPtr & table_element, const ASTs & table_predicates, const Names & table_column) const;
bool optimizeExpression(const ASTPtr & outer_expression, ASTSelectQuery * subquery, ASTSelectQuery::Expression expr);
bool optimizeImpl(const ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
bool allowPushDown(
const ASTSelectQuery * subquery,
const ASTPtr & outer_predicate,
const std::vector<ProjectionWithAlias> & subquery_projection_columns,
const std::vector<IdentifierWithQualifier> & outer_predicate_dependencies,
OptimizeKind & optimize_kind);
bool checkDependencies(
const std::vector<ProjectionWithAlias> & projection_columns,
const std::vector<IdentifierWithQualifier> & dependencies,
OptimizeKind & optimize_kind);
void setNewAliasesForInnerPredicate(const std::vector<ProjectionWithAlias> & projection_columns,
const std::vector<IdentifierWithQualifier> & inner_predicate_dependencies);
SubqueriesProjectionColumns getAllSubqueryProjectionColumns();
void getSubqueryProjectionColumns(const ASTPtr & subquery, SubqueriesProjectionColumns & all_subquery_projection_columns);
ASTs getSelectQueryProjectionColumns(ASTPtr & ast);
ASTs evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk);
void cleanExpressionAlias(ASTPtr & expression);
bool tryMovePredicatesFromHavingToWhere(ASTSelectQuery & select_query);
};
}

View File

@ -0,0 +1,119 @@
#include <Interpreters/PredicateRewriteVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/ExtractExpressionInfoVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
PredicateRewriteVisitorData::PredicateRewriteVisitorData(
const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_)
: context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_)
{
}
void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_query, ASTPtr &)
{
auto & internal_select_list = union_select_query.list_of_selects->children;
if (internal_select_list.size() > 0)
visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]);
for (size_t index = 1; index < internal_select_list.size(); ++index)
visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]);
}
void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
{
is_rewrite |= rewriteSubquery(select_query, column_names, column_names);
}
void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &)
{
/// For non first select, its alias has no more significance, so we can set a temporary alias for them
ASTPtr temp_internal_select = select_query.clone();
ASTSelectQuery * temp_select_query = temp_internal_select->as<ASTSelectQuery>();
size_t alias_index = 0;
for (auto & ref_select : temp_select_query->refSelect()->children)
{
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsMatcher>() &&
!ref_select->as<ASTIdentifier>())
{
if (const auto & alias = ref_select->tryGetAlias(); alias.empty())
ref_select->setAlias("--predicate_optimizer_" + toString(alias_index++));
}
}
const Names & internal_columns = InterpreterSelectQuery(
temp_internal_select, context, SelectQueryOptions().analyze()).getSampleBlock().getNames();
if (rewriteSubquery(*temp_select_query, column_names, internal_columns))
{
is_rewrite |= true;
select_query.setExpression(ASTSelectQuery::Expression::SELECT, std::move(temp_select_query->refSelect()));
select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(temp_select_query->refHaving()));
}
}
static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vector<ASTIdentifier *> & identifiers)
{
/// Skip WHERE x in (SELECT ...)
if (!predicate->as<ASTSubquery>())
{
for (auto & children : predicate->children)
cleanAliasAndCollectIdentifiers(children, identifiers);
}
if (const auto alias = predicate->tryGetAlias(); !alias.empty())
predicate->setAlias("");
if (ASTIdentifier * identifier = predicate->as<ASTIdentifier>())
identifiers.emplace_back(identifier);
}
bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, const Names & outer_columns, const Names & inner_columns)
{
if ((!optimize_final && subquery.final())
|| subquery.with() || subquery.withFill()
|| subquery.limitBy() || subquery.limitLength()
|| hasStatefulFunction(subquery.select(), context))
return false;
for (const auto & predicate : predicates)
{
std::vector<ASTIdentifier *> identifiers;
ASTPtr optimize_predicate = predicate->clone();
cleanAliasAndCollectIdentifiers(optimize_predicate, identifiers);
for (size_t index = 0; index < identifiers.size(); ++index)
{
const auto & column_name = identifiers[index]->shortName();
const auto & outer_column_iterator = std::find(outer_columns.begin(), outer_columns.end(), column_name);
/// For lambda functions, we can't always find them in the list of columns
/// For example: SELECT * FROM system.one WHERE arrayMap(x -> x, [dummy]) = [0]
if (outer_column_iterator != outer_columns.end())
identifiers[index]->setShortName(inner_columns[outer_column_iterator - outer_columns.begin()]);
}
/// We only need to push all the predicates to subquery having
/// The subquery optimizer will move the appropriate predicates from having to where
subquery.setExpression(ASTSelectQuery::Expression::HAVING,
subquery.having() ? makeASTFunction("and", optimize_predicate, subquery.having()) : optimize_predicate);
}
return true;
}
}

View File

@ -0,0 +1,36 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class PredicateRewriteVisitorData
{
public:
bool is_rewrite = false;
using TypeToVisit = ASTSelectWithUnionQuery;
void visit(ASTSelectWithUnionQuery & union_select_query, ASTPtr &);
PredicateRewriteVisitorData(const Context & context_, const ASTs & predicates_, const Names & column_names_, bool optimize_final_);
private:
const Context & context;
const ASTs & predicates;
const Names & column_names;
bool optimize_final;
void visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &);
void visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &);
bool rewriteSubquery(ASTSelectQuery & subquery, const Names & outer_columns, const Names & inner_columns);
};
using PredicateRewriteMatcher = OneTypeMatcher<PredicateRewriteVisitorData, false>;
using PredicateRewriteVisitor = InDepthNodeVisitor<PredicateRewriteMatcher, true>;
}

View File

@ -181,7 +181,7 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query)
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible.
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
{
ASTs & elements = select_query->select()->children;
@ -632,7 +632,7 @@ std::vector<const ASTFunction *> getAggregates(const ASTPtr & query)
/// After execution, columns will only contain the list of columns needed to read from the table.
void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns)
{
/// We caclulate required_source_columns with source_columns modifications and swap them on exit
/// We calculate required_source_columns with source_columns modifications and swap them on exit
required_source_columns = source_columns;
if (!additional_source_columns.empty())
@ -652,15 +652,15 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA
if (columns_context.has_table_join)
{
NameSet avaliable_columns;
NameSet available_columns;
for (const auto & name : source_columns)
avaliable_columns.insert(name.name);
available_columns.insert(name.name);
/// Add columns obtained by JOIN (if needed).
for (const auto & joined_column : analyzed_join->columnsFromJoinedTable())
{
auto & name = joined_column.name;
if (avaliable_columns.count(name))
if (available_columns.count(name))
continue;
if (required.count(name))
@ -845,12 +845,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
{
if (storage)
{
const ColumnsDescription & starage_columns = storage->getColumns();
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, starage_columns.getOrdinary().getNames());
const ColumnsDescription & storage_columns = storage->getColumns();
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, storage_columns.getOrdinary().getNames());
auto & table = tables_with_columns.back();
table.addHiddenColumns(starage_columns.getMaterialized());
table.addHiddenColumns(starage_columns.getAliases());
table.addHiddenColumns(starage_columns.getVirtuals());
table.addHiddenColumns(storage_columns.getMaterialized());
table.addHiddenColumns(storage_columns.getAliases());
table.addHiddenColumns(storage_columns.getVirtuals());
}
else
{
@ -920,6 +920,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
if (select_query)
{
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
@ -935,9 +938,6 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set);
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);

View File

@ -167,7 +167,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
{
which_from_type = WhichDataType(*from_type_hint);
// This was added to mitigate converting DateTime64-Field (a typedef to a Decimal64) to DataTypeDate64-compatitable type.
// This was added to mitigate converting DateTime64-Field (a typedef to a Decimal64) to DataTypeDate64-compatible type.
if (from_type_hint && from_type_hint->equals(type))
{
return src;

View File

@ -118,7 +118,7 @@ void loadMetadata(Context & context)
}
catch (...)
{
tryLogCurrentException("Load metadata", "Can't remove force restore file to enable data santity checks");
tryLogCurrentException("Load metadata", "Can't remove force restore file to enable data sanity checks");
}
}
}

View File

@ -0,0 +1,95 @@
#include <Processors/DelayedPortsProcessor.h>
namespace DB
{
DelayedPortsProcessor::DelayedPortsProcessor(const Block & header, size_t num_ports, const PortNumbers & delayed_ports)
: IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header))
, num_delayed(delayed_ports.size())
{
port_pairs.resize(num_ports);
auto input_it = inputs.begin();
auto output_it = outputs.begin();
for (size_t i = 0; i < num_ports; ++i)
{
port_pairs[i].input_port = &*input_it;
port_pairs[i].output_port = &*output_it;
++input_it;
++output_it;
}
for (auto & delayed : delayed_ports)
port_pairs[delayed].is_delayed = true;
}
bool DelayedPortsProcessor::processPair(PortsPair & pair)
{
auto finish = [&]()
{
if (!pair.is_finished)
{
pair.is_finished = true;
++num_finished;
}
};
if (pair.output_port->isFinished())
{
pair.input_port->close();
finish();
return false;
}
if (pair.input_port->isFinished())
{
pair.output_port->finish();
finish();
return false;
}
if (!pair.output_port->canPush())
return false;
pair.input_port->setNeeded();
if (pair.input_port->hasData())
pair.output_port->pushData(pair.input_port->pullData());
return true;
}
IProcessor::Status DelayedPortsProcessor::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs)
{
bool skip_delayed = (num_finished + num_delayed) < port_pairs.size();
bool need_data = false;
for (auto & output_number : updated_outputs)
{
if (!skip_delayed || !port_pairs[output_number].is_delayed)
need_data = processPair(port_pairs[output_number]) || need_data;
}
for (auto & input_number : updated_inputs)
{
if (!skip_delayed || !port_pairs[input_number].is_delayed)
need_data = processPair(port_pairs[input_number]) || need_data;
}
/// In case if main streams are finished at current iteration, start processing delayed streams.
if (skip_delayed && (num_finished + num_delayed) >= port_pairs.size())
{
for (auto & pair : port_pairs)
if (pair.is_delayed)
need_data = processPair(pair) || need_data;
}
if (num_finished == port_pairs.size())
return Status::Finished;
if (need_data)
return Status::NeedData;
return Status::PortFull;
}
}

View File

@ -0,0 +1,37 @@
#pragma once
#include <Processors/IProcessor.h>
namespace DB
{
/// Processor with N inputs and N outputs. Only moves data from i-th input to i-th output as is.
/// Some ports are delayed. Delayed ports are processed after other outputs are all finished.
/// Data between ports is not mixed. It is important because this processor can be used before MergingSortedTransform.
/// Delayed ports are appeared after joins, when some non-matched data need to be processed at the end.
class DelayedPortsProcessor : public IProcessor
{
public:
DelayedPortsProcessor(const Block & header, size_t num_ports, const PortNumbers & delayed_ports);
String getName() const override { return "DelayedPorts"; }
Status prepare(const PortNumbers &, const PortNumbers &) override;
private:
struct PortsPair
{
InputPort * input_port = nullptr;
OutputPort * output_port = nullptr;
bool is_delayed = false;
bool is_finished = false;
};
std::vector<PortsPair> port_pairs;
size_t num_delayed;
size_t num_finished = 0;
bool processPair(PortsPair & pair);
};
}

View File

@ -64,13 +64,6 @@ bool PipelineExecutor::addEdges(UInt64 node)
throwUnknownProcessor(to_proc, cur, true);
UInt64 proc_num = it->second;
for (auto & edge : edges)
{
if (edge.to == proc_num)
throw Exception("Multiple edges are not allowed for the same processors.", ErrorCodes::LOGICAL_ERROR);
}
auto & edge = edges.emplace_back(proc_num, is_backward, input_port_number, output_port_number, update_list);
from_port.setUpdateInfo(&edge.update_info);

View File

@ -18,6 +18,7 @@
#include <Interpreters/Context.h>
#include <Common/typeid_cast.h>
#include <Common/CurrentThread.h>
#include <Processors/DelayedPortsProcessor.h>
namespace DB
{
@ -165,7 +166,6 @@ void QueryPipeline::addSimpleTransformImpl(const TProcessorGetter & getter)
for (size_t stream_num = 0; stream_num < streams.size(); ++stream_num)
add_transform(streams[stream_num], StreamType::Main, stream_num);
add_transform(delayed_stream_port, StreamType::Main);
add_transform(totals_having_port, StreamType::Totals);
add_transform(extremes_port, StreamType::Extremes);
@ -185,7 +185,6 @@ void QueryPipeline::addSimpleTransform(const ProcessorGetterWithStreamKind & get
void QueryPipeline::addPipe(Processors pipe)
{
checkInitialized();
concatDelayedStream();
if (pipe.empty())
throw Exception("Can't add empty processors list to QueryPipeline.", ErrorCodes::LOGICAL_ERROR);
@ -224,41 +223,20 @@ void QueryPipeline::addDelayedStream(ProcessorPtr source)
{
checkInitialized();
if (delayed_stream_port)
throw Exception("QueryPipeline already has stream with non joined data.", ErrorCodes::LOGICAL_ERROR);
checkSource(source, false);
assertBlocksHaveEqualStructure(current_header, source->getOutputs().front().getHeader(), "QueryPipeline");
delayed_stream_port = &source->getOutputs().front();
IProcessor::PortNumbers delayed_streams = { streams.size() };
streams.emplace_back(&source->getOutputs().front());
processors.emplace_back(std::move(source));
}
void QueryPipeline::concatDelayedStream()
{
if (!delayed_stream_port)
return;
auto resize = std::make_shared<ResizeProcessor>(current_header, getNumMainStreams(), 1);
auto stream = streams.begin();
for (auto & input : resize->getInputs())
connect(**(stream++), input);
auto concat = std::make_shared<ConcatProcessor>(current_header, 2);
connect(resize->getOutputs().front(), concat->getInputs().front());
connect(*delayed_stream_port, concat->getInputs().back());
streams = { &concat->getOutputs().front() };
processors.emplace_back(std::move(resize));
processors.emplace_back(std::move(concat));
delayed_stream_port = nullptr;
auto processor = std::make_shared<DelayedPortsProcessor>(current_header, streams.size(), delayed_streams);
addPipe({ std::move(processor) });
}
void QueryPipeline::resize(size_t num_streams, bool force)
{
checkInitialized();
concatDelayedStream();
if (!force && num_streams == getNumStreams())
return;
@ -443,7 +421,6 @@ void QueryPipeline::unitePipelines(
std::vector<QueryPipeline> && pipelines, const Block & common_header, const Context & context)
{
checkInitialized();
concatDelayedStream();
addSimpleTransform([&](const Block & header)
{
@ -456,7 +433,6 @@ void QueryPipeline::unitePipelines(
for (auto & pipeline : pipelines)
{
pipeline.checkInitialized();
pipeline.concatDelayedStream();
pipeline.addSimpleTransform([&](const Block & header)
{

View File

@ -57,7 +57,7 @@ public:
/// Will read from this stream after all data was read from other streams.
void addDelayedStream(ProcessorPtr source);
bool hasDelayedStream() const { return delayed_stream_port; }
/// Check if resize transform was used. (In that case another distinct transform will be added).
bool hasMixedStreams() const { return has_resize || hasMoreThanOneStream(); }
@ -69,8 +69,7 @@ public:
PipelineExecutorPtr execute();
size_t getNumStreams() const { return streams.size() + (hasDelayedStream() ? 1 : 0); }
size_t getNumMainStreams() const { return streams.size(); }
size_t getNumStreams() const { return streams.size(); }
bool hasMoreThanOneStream() const { return getNumStreams() > 1; }
bool hasTotals() const { return totals_having_port != nullptr; }
@ -103,9 +102,6 @@ private:
OutputPort * totals_having_port = nullptr;
OutputPort * extremes_port = nullptr;
/// Special port for delayed stream.
OutputPort * delayed_stream_port = nullptr;
/// If resize processor was added to pipeline.
bool has_resize = false;
@ -126,7 +122,6 @@ private:
void checkInitialized();
void checkSource(const ProcessorPtr & source, bool can_have_totals);
void concatDelayedStream();
template <typename TProcessorGetter>
void addSimpleTransformImpl(const TProcessorGetter & getter);

View File

@ -249,6 +249,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata) const
/// let's use info about old type
if (data_type == nullptr)
codec->useInfoAboutType(column.type);
else /// use info about new DataType
codec->useInfoAboutType(data_type);
column.codec = codec;
}
@ -316,7 +319,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata) const
if (insert_it == metadata.indices.indices.end())
throw Exception("Wrong index name. Cannot find index " + backQuote(after_index_name) + " to insert after.",
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::BAD_ARGUMENTS);
++insert_it;
}
@ -338,7 +341,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata) const
if (if_exists)
return;
throw Exception("Wrong index name. Cannot find index " + backQuote(index_name) + " to drop.",
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::BAD_ARGUMENTS);
}
metadata.indices.indices.erase(erase_it);
@ -378,7 +381,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata) const
if (if_exists)
return;
throw Exception("Wrong constraint name. Cannot find constraint `" + constraint_name + "` to drop.",
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::BAD_ARGUMENTS);
}
metadata.constraints.constraints.erase(erase_it);
}

View File

@ -269,7 +269,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
Settings insert_settings;
std::string insert_query;
readHeader(in, insert_settings, insert_query);
readHeader(in, insert_settings, insert_query, log);
RemoteBlockOutputStream remote{*connection, timeouts, insert_query, &insert_settings};
@ -289,7 +289,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
}
void StorageDistributedDirectoryMonitor::readHeader(
ReadBuffer & in, Settings & insert_settings, std::string & insert_query) const
ReadBuffer & in, Settings & insert_settings, std::string & insert_query, Logger * log)
{
UInt64 query_size;
readVarUInt(query_size, in);
@ -449,7 +449,7 @@ struct StorageDistributedDirectoryMonitor::Batch
}
ReadBufferFromFile in(file_path->second);
parent.readHeader(in, insert_settings, insert_query);
parent.readHeader(in, insert_settings, insert_query, parent.log);
if (first)
{
@ -520,6 +520,53 @@ struct StorageDistributedDirectoryMonitor::Batch
}
};
class DirectoryMonitorBlockInputStream : public IBlockInputStream
{
public:
explicit DirectoryMonitorBlockInputStream(const String & file_name)
: in(file_name)
, decompressing_in(in)
, block_in(decompressing_in, ClickHouseRevision::get())
, log{&Logger::get("DirectoryMonitorBlockInputStream")}
{
Settings insert_settings;
String insert_query;
StorageDistributedDirectoryMonitor::readHeader(in, insert_settings, insert_query, log);
block_in.readPrefix();
first_block = block_in.read();
header = first_block.cloneEmpty();
}
String getName() const override { return "DirectoryMonitor"; }
protected:
Block getHeader() const override { return header; }
Block readImpl() override
{
if (first_block)
return std::move(first_block);
return block_in.read();
}
void readSuffix() override { block_in.readSuffix(); }
private:
ReadBufferFromFile in;
CompressedReadBuffer decompressing_in;
NativeBlockInputStream block_in;
Block first_block;
Block header;
Logger * log;
};
BlockInputStreamPtr StorageDistributedDirectoryMonitor::createStreamFromFile(const String & file_name)
{
return std::make_shared<DirectoryMonitorBlockInputStream>(file_name);
}
void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map<UInt64, std::string> & files)
{
@ -557,7 +604,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
{
/// Determine metadata of the current file and check if it is not broken.
ReadBufferFromFile in{file_path};
readHeader(in, insert_settings, insert_query);
readHeader(in, insert_settings, insert_query, log);
CompressedReadBuffer decompressing_in(in);
NativeBlockInputStream block_in(decompressing_in, ClickHouseRevision::get());

View File

@ -31,6 +31,8 @@ public:
void flushAllData();
void shutdownAndDropAllData();
static BlockInputStreamPtr createStreamFromFile(const String & file_name);
private:
void run();
bool processFiles();
@ -69,7 +71,9 @@ private:
ThreadFromGlobalPool thread{&StorageDistributedDirectoryMonitor::run, this};
/// Read insert query and insert settings for backward compatible.
void readHeader(ReadBuffer & in, Settings & insert_settings, std::string & insert_query) const;
static void readHeader(ReadBuffer & in, Settings & insert_settings, std::string & insert_query, Logger * log);
friend class DirectoryMonitorBlockInputStream;
};
}

View File

@ -182,7 +182,7 @@ void DistributedBlockOutputStream::initWritingJobs(const Block & first_block)
}
if (num_shards > 1)
shard_jobs.shard_current_block_permuation.reserve(first_block.rows());
shard_jobs.shard_current_block_permutation.reserve(first_block.rows());
}
}
@ -235,7 +235,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
/// Generate current shard block
if (num_shards > 1)
{
auto & shard_permutation = shard_job.shard_current_block_permuation;
auto & shard_permutation = shard_job.shard_current_block_permutation;
size_t num_shard_rows = shard_permutation.size();
for (size_t j = 0; j < current_block.columns(); ++j)
@ -348,10 +348,10 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
/// Prepare row numbers for each shard
for (size_t shard_index : ext::range(0, num_shards))
per_shard_jobs[shard_index].shard_current_block_permuation.resize(0);
per_shard_jobs[shard_index].shard_current_block_permutation.resize(0);
for (size_t i = 0; i < block.rows(); ++i)
per_shard_jobs[current_selector[i]].shard_current_block_permuation.push_back(i);
per_shard_jobs[current_selector[i]].shard_current_block_permutation.push_back(i);
}
try

View File

@ -123,7 +123,7 @@ private:
struct JobShard
{
std::list<JobReplica> replicas_jobs;
IColumn::Permutation shard_current_block_permuation;
IColumn::Permutation shard_current_block_permutation;
};
std::vector<JobShard> per_shard_jobs;

View File

@ -1,4 +1,4 @@
/* iopyright (c) 2018 BlackBerry Limited
/* Copyright (c) 2018 BlackBerry Limited
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -95,6 +95,66 @@ static void extractDependentTable(ASTPtr & query, String & select_database_name,
DB::ErrorCodes::LOGICAL_ERROR);
}
MergeableBlocksPtr StorageLiveView::collectMergeableBlocks(const Context & context)
{
ASTPtr mergeable_query = inner_query;
if (inner_subquery)
mergeable_query = inner_subquery;
MergeableBlocksPtr new_mergeable_blocks = std::make_shared<MergeableBlocks>();
BlocksPtrs new_blocks = std::make_shared<std::vector<BlocksPtr>>();
BlocksPtr base_blocks = std::make_shared<Blocks>();
InterpreterSelectQuery interpreter(mergeable_query->clone(), context, SelectQueryOptions(QueryProcessingStage::WithMergeableState), Names());
auto view_mergeable_stream = std::make_shared<MaterializingBlockInputStream>(interpreter.execute().in);
while (Block this_block = view_mergeable_stream->read())
base_blocks->push_back(this_block);
new_blocks->push_back(base_blocks);
new_mergeable_blocks->blocks = new_blocks;
new_mergeable_blocks->sample_block = view_mergeable_stream->getHeader();
return new_mergeable_blocks;
}
BlockInputStreams StorageLiveView::blocksToInputStreams(BlocksPtrs blocks, Block & sample_block)
{
BlockInputStreams streams;
for (auto & blocks_ : *blocks)
{
BlockInputStreamPtr stream = std::make_shared<BlocksBlockInputStream>(std::make_shared<BlocksPtr>(blocks_), sample_block);
streams.push_back(std::move(stream));
}
return streams;
}
/// Complete query using input streams from mergeable blocks
BlockInputStreamPtr StorageLiveView::completeQuery(BlockInputStreams from)
{
auto block_context = std::make_unique<Context>(global_context);
block_context->makeQueryContext();
auto blocks_storage = StorageBlocks::createStorage(database_name, table_name, parent_storage->getColumns(),
std::move(from), QueryProcessingStage::WithMergeableState);
block_context->addExternalTable(table_name + "_blocks", blocks_storage);
InterpreterSelectQuery select(inner_blocks_query->clone(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete));
BlockInputStreamPtr data = std::make_shared<MaterializingBlockInputStream>(select.execute().in);
/// Squashing is needed here because the view query can generate a lot of blocks
/// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY
/// and two-level aggregation is triggered).
data = std::make_shared<SquashingBlockInputStream>(
data, global_context.getSettingsRef().min_insert_block_size_rows,
global_context.getSettingsRef().min_insert_block_size_bytes);
return data;
}
void StorageLiveView::writeIntoLiveView(
StorageLiveView & live_view,
@ -102,8 +162,6 @@ void StorageLiveView::writeIntoLiveView(
const Context & context)
{
BlockOutputStreamPtr output = std::make_shared<LiveViewBlockOutputStream>(live_view);
auto block_context = std::make_unique<Context>(context.getGlobalContext());
block_context->makeQueryContext();
/// Check if live view has any readers if not
/// just reset blocks to empty and do nothing else
@ -119,54 +177,40 @@ void StorageLiveView::writeIntoLiveView(
bool is_block_processed = false;
BlockInputStreams from;
BlocksPtrs mergeable_blocks;
MergeableBlocksPtr mergeable_blocks;
BlocksPtr new_mergeable_blocks = std::make_shared<Blocks>();
ASTPtr mergeable_query = live_view.getInnerQuery();
if (live_view.getInnerSubQuery())
mergeable_query = live_view.getInnerSubQuery();
{
std::lock_guard lock(live_view.mutex);
mergeable_blocks = live_view.getMergeableBlocks();
if (!mergeable_blocks || mergeable_blocks->size() >= context.getGlobalContext().getSettingsRef().max_live_view_insert_blocks_before_refresh)
if (!mergeable_blocks || mergeable_blocks->blocks->size() >= context.getGlobalContext().getSettingsRef().max_live_view_insert_blocks_before_refresh)
{
mergeable_blocks = std::make_shared<std::vector<BlocksPtr>>();
BlocksPtr base_mergeable_blocks = std::make_shared<Blocks>();
InterpreterSelectQuery interpreter(mergeable_query, context, SelectQueryOptions(QueryProcessingStage::WithMergeableState), Names());
auto view_mergeable_stream = std::make_shared<MaterializingBlockInputStream>(
interpreter.execute().in);
while (Block this_block = view_mergeable_stream->read())
base_mergeable_blocks->push_back(this_block);
mergeable_blocks->push_back(base_mergeable_blocks);
mergeable_blocks = live_view.collectMergeableBlocks(context);
live_view.setMergeableBlocks(mergeable_blocks);
/// Create from streams
for (auto & blocks_ : *mergeable_blocks)
{
if (blocks_->empty())
continue;
auto sample_block = blocks_->front().cloneEmpty();
BlockInputStreamPtr stream = std::make_shared<BlocksBlockInputStream>(std::make_shared<BlocksPtr>(blocks_), sample_block);
from.push_back(std::move(stream));
}
from = live_view.blocksToInputStreams(mergeable_blocks->blocks, mergeable_blocks->sample_block);
is_block_processed = true;
}
}
auto parent_storage = context.getTable(live_view.getSelectDatabaseName(), live_view.getSelectTableName());
if (!is_block_processed)
{
ASTPtr mergeable_query = live_view.getInnerQuery();
if (live_view.getInnerSubQuery())
mergeable_query = live_view.getInnerSubQuery();
BlockInputStreams streams = {std::make_shared<OneBlockInputStream>(block)};
auto blocks_storage = StorageBlocks::createStorage(live_view.database_name, live_view.table_name,
parent_storage->getColumns(), std::move(streams), QueryProcessingStage::FetchColumns);
live_view.getParentStorage()->getColumns(), std::move(streams), QueryProcessingStage::FetchColumns);
InterpreterSelectQuery select_block(mergeable_query, context, blocks_storage,
QueryProcessingStage::WithMergeableState);
auto data_mergeable_stream = std::make_shared<MaterializingBlockInputStream>(
select_block.execute().in);
while (Block this_block = data_mergeable_stream->read())
new_mergeable_blocks->push_back(this_block);
@ -177,32 +221,12 @@ void StorageLiveView::writeIntoLiveView(
std::lock_guard lock(live_view.mutex);
mergeable_blocks = live_view.getMergeableBlocks();
mergeable_blocks->push_back(new_mergeable_blocks);
/// Create from streams
for (auto & blocks_ : *mergeable_blocks)
{
if (blocks_->empty())
continue;
auto sample_block = blocks_->front().cloneEmpty();
BlockInputStreamPtr stream = std::make_shared<BlocksBlockInputStream>(std::make_shared<BlocksPtr>(blocks_), sample_block);
from.push_back(std::move(stream));
}
mergeable_blocks->blocks->push_back(new_mergeable_blocks);
from = live_view.blocksToInputStreams(mergeable_blocks->blocks, mergeable_blocks->sample_block);
}
}
auto blocks_storage = StorageBlocks::createStorage(live_view.database_name, live_view.table_name, parent_storage->getColumns(), std::move(from), QueryProcessingStage::WithMergeableState);
block_context->addExternalTable(live_view.table_name + "_blocks", blocks_storage);
InterpreterSelectQuery select(live_view.getInnerBlocksQuery(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete));
BlockInputStreamPtr data = std::make_shared<MaterializingBlockInputStream>(select.execute().in);
/// Squashing is needed here because the view query can generate a lot of blocks
/// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY
/// and two-level aggregation is triggered).
data = std::make_shared<SquashingBlockInputStream>(
data, context.getGlobalContext().getSettingsRef().min_insert_block_size_rows, context.getGlobalContext().getSettingsRef().min_insert_block_size_bytes);
BlockInputStreamPtr data = live_view.completeQuery(from);
copyData(*data, *output);
}
@ -247,6 +271,8 @@ StorageLiveView::StorageLiveView(
DatabaseAndTableName(select_database_name, select_table_name),
DatabaseAndTableName(database_name, table_name));
parent_storage = local_context.getTable(select_database_name, select_table_name);
is_temporary = query.temporary;
temporary_live_view_timeout = local_context.getSettingsRef().temporary_live_view_timeout.totalSeconds();
@ -298,36 +324,10 @@ bool StorageLiveView::getNewBlocks()
UInt128 key;
BlocksPtr new_blocks = std::make_shared<Blocks>();
BlocksMetadataPtr new_blocks_metadata = std::make_shared<BlocksMetadata>();
BlocksPtr new_mergeable_blocks = std::make_shared<Blocks>();
ASTPtr mergeable_query = inner_query;
if (inner_subquery)
mergeable_query = inner_subquery;
InterpreterSelectQuery interpreter(mergeable_query->clone(), *live_view_context, SelectQueryOptions(QueryProcessingStage::WithMergeableState), Names());
auto mergeable_stream = std::make_shared<MaterializingBlockInputStream>(interpreter.execute().in);
while (Block block = mergeable_stream->read())
new_mergeable_blocks->push_back(block);
auto block_context = std::make_unique<Context>(global_context);
block_context->makeQueryContext();
mergeable_blocks = std::make_shared<std::vector<BlocksPtr>>();
mergeable_blocks->push_back(new_mergeable_blocks);
BlockInputStreamPtr from = std::make_shared<BlocksBlockInputStream>(std::make_shared<BlocksPtr>(new_mergeable_blocks), mergeable_stream->getHeader());
auto blocks_storage = StorageBlocks::createStorage(database_name, table_name, global_context.getTable(select_database_name, select_table_name)->getColumns(), {from}, QueryProcessingStage::WithMergeableState);
block_context->addExternalTable(table_name + "_blocks", blocks_storage);
InterpreterSelectQuery select(inner_blocks_query->clone(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete));
BlockInputStreamPtr data = std::make_shared<MaterializingBlockInputStream>(select.execute().in);
/// Squashing is needed here because the view query can generate a lot of blocks
/// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY
/// and two-level aggregation is triggered).
data = std::make_shared<SquashingBlockInputStream>(
data, global_context.getSettingsRef().min_insert_block_size_rows, global_context.getSettingsRef().min_insert_block_size_bytes);
mergeable_blocks = collectMergeableBlocks(*live_view_context);
BlockInputStreams from = blocksToInputStreams(mergeable_blocks->blocks, mergeable_blocks->sample_block);
BlockInputStreamPtr data = completeQuery({from});
while (Block block = data->read())
{

View File

@ -27,9 +27,16 @@ struct BlocksMetadata
UInt64 version;
};
struct MergeableBlocks
{
BlocksPtrs blocks;
Block sample_block;
};
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
using BlocksMetadataPtr = std::shared_ptr<BlocksMetadata>;
using MergeableBlocksPtr = std::shared_ptr<MergeableBlocks>;
class StorageLiveView : public ext::shared_ptr_helper<StorageLiveView>, public IStorage
{
@ -45,6 +52,7 @@ public:
String getDatabaseName() const override { return database_name; }
String getSelectDatabaseName() const { return select_database_name; }
String getSelectTableName() const { return select_table_name; }
StoragePtr getParentStorage() const { return parent_storage; }
NameAndTypePair getColumn(const String & column_name) const override;
bool hasColumn(const String & column_name) const override;
@ -138,8 +146,14 @@ public:
unsigned num_streams) override;
std::shared_ptr<BlocksPtr> getBlocksPtr() { return blocks_ptr; }
BlocksPtrs getMergeableBlocks() { return mergeable_blocks; }
void setMergeableBlocks(BlocksPtrs blocks) { mergeable_blocks = blocks; }
MergeableBlocksPtr getMergeableBlocks() { return mergeable_blocks; }
/// Collect mergeable blocks and their sample. Must be called holding mutex
MergeableBlocksPtr collectMergeableBlocks(const Context & context);
/// Complete query using input streams from mergeable blocks
BlockInputStreamPtr completeQuery(BlockInputStreams from);
void setMergeableBlocks(MergeableBlocksPtr blocks) { mergeable_blocks = blocks; }
std::shared_ptr<bool> getActivePtr() { return active_ptr; }
/// Read new data blocks that store query result
@ -147,6 +161,9 @@ public:
Block getHeader() const;
/// convert blocks to input streams
static BlockInputStreams blocksToInputStreams(BlocksPtrs blocks, Block & sample_block);
static void writeIntoLiveView(
StorageLiveView & live_view,
const Block & block,
@ -162,6 +179,7 @@ private:
ASTPtr inner_blocks_query; /// query over the mergeable blocks to produce final result
Context & global_context;
std::unique_ptr<Context> live_view_context;
StoragePtr parent_storage;
bool is_temporary = false;
/// Mutex to protect access to sample block
@ -180,7 +198,7 @@ private:
std::shared_ptr<BlocksPtr> blocks_ptr;
/// Current data blocks metadata
std::shared_ptr<BlocksMetadataPtr> blocks_metadata_ptr;
BlocksPtrs mergeable_blocks;
MergeableBlocksPtr mergeable_blocks;
/// Background thread for temporary tables
/// which drops this table if there are no users

View File

@ -3216,18 +3216,20 @@ ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, SpacePtr spa
ReservationPtr MergeTreeData::reserveSpacePreferringTTLRules(UInt64 expected_size,
const MergeTreeDataPart::TTLInfos & ttl_infos,
time_t time_of_move) const
time_t time_of_move,
size_t min_volume_index) const
{
expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size);
ReservationPtr reservation = tryReserveSpacePreferringTTLRules(expected_size, ttl_infos, time_of_move);
ReservationPtr reservation = tryReserveSpacePreferringTTLRules(expected_size, ttl_infos, time_of_move, min_volume_index);
return checkAndReturnReservation(expected_size, std::move(reservation));
}
ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_size,
const MergeTreeDataPart::TTLInfos & ttl_infos,
time_t time_of_move) const
time_t time_of_move,
size_t min_volume_index) const
{
expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size);
@ -3253,10 +3255,19 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_
reservation = destination_ptr->reserve(expected_size);
if (reservation)
return reservation;
else
if (ttl_entry->destination_type == PartDestinationType::VOLUME)
LOG_WARNING(log, "Would like to reserve space on volume '"
<< ttl_entry->destination_name << "' by TTL rule of table '"
<< log_name << "' but there is not enough space");
else if (ttl_entry->destination_type == PartDestinationType::DISK)
LOG_WARNING(log, "Would like to reserve space on disk '"
<< ttl_entry->destination_name << "' by TTL rule of table '"
<< log_name << "' but there is not enough space");
}
}
reservation = storage_policy->reserve(expected_size);
reservation = storage_policy->reserve(expected_size, min_volume_index);
return reservation;
}

View File

@ -675,10 +675,12 @@ public:
/// Reserves space at least 1MB preferring best destination according to `ttl_infos`.
ReservationPtr reserveSpacePreferringTTLRules(UInt64 expected_size,
const MergeTreeDataPart::TTLInfos & ttl_infos,
time_t time_of_move) const;
time_t time_of_move,
size_t min_volume_index = 0) const;
ReservationPtr tryReserveSpacePreferringTTLRules(UInt64 expected_size,
const MergeTreeDataPart::TTLInfos & ttl_infos,
time_t time_of_move) const;
time_t time_of_move,
size_t min_volume_index = 0) const;
/// Choose disk with max available free space
/// Reserves 0 bytes
ReservationPtr makeEmptyReservationOnLargestDisk() { return storage_policy->makeEmptyReservationOnLargestDisk(); }

View File

@ -169,9 +169,12 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation()
const auto data_settings = data.getSettings();
size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed);
/// DataPart can be store only at one disk. Get Max of free space at all disks
UInt64 disk_space = data.storage_policy->getMaxUnreservedFreeSpace();
/// Allow mutations only if there are enough threads, leave free threads for merges else
if (background_pool_size - busy_threads_in_pool >= data_settings->number_of_free_entries_in_pool_to_execute_mutation)
return static_cast<UInt64>(data.storage_policy->getMaxUnreservedFreeSpace() / DISK_USAGE_COEFFICIENT_TO_RESERVE);
return static_cast<UInt64>(disk_space / DISK_USAGE_COEFFICIENT_TO_RESERVE);
return 0;
}

Some files were not shown because too many files have changed in this diff Show More