Merge branch 'master' into tighten-limits-functional-tests

This commit is contained in:
Alexey Milovidov 2024-08-12 02:28:53 +02:00
commit 59f45909eb
51 changed files with 1485 additions and 421 deletions

View File

@ -69,8 +69,8 @@ ENV MAX_RUN_TIME=0
# Unrelated to vars in setup_minio.sh, but should be the same there # Unrelated to vars in setup_minio.sh, but should be the same there
# to have the same binaries for local running scenario # to have the same binaries for local running scenario
ARG MINIO_SERVER_VERSION=2022-01-03T18-22-58Z ARG MINIO_SERVER_VERSION=2024-08-03T04-33-23Z
ARG MINIO_CLIENT_VERSION=2022-01-05T23-52-51Z ARG MINIO_CLIENT_VERSION=2024-07-31T15-58-33Z
ARG TARGETARCH ARG TARGETARCH
# Download Minio-related binaries # Download Minio-related binaries

View File

@ -59,8 +59,8 @@ find_os() {
download_minio() { download_minio() {
local os local os
local arch local arch
local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z} local minio_server_version=${MINIO_SERVER_VERSION:-2024-08-03T04-33-23Z}
local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z} local minio_client_version=${MINIO_CLIENT_VERSION:-2024-07-31T15-58-33Z}
os=$(find_os) os=$(find_os)
arch=$(find_arch) arch=$(find_arch)
@ -82,10 +82,10 @@ setup_minio() {
local test_type=$1 local test_type=$1
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse ./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
./mc admin user add clickminio test testtest ./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test ./mc admin policy attach clickminio readwrite --user=test
./mc mb --ignore-existing clickminio/test ./mc mb --ignore-existing clickminio/test
if [ "$test_type" = "stateless" ]; then if [ "$test_type" = "stateless" ]; then
./mc policy set public clickminio/test ./mc anonymous set public clickminio/test
fi fi
} }
@ -148,4 +148,4 @@ main() {
setup_aws_credentials setup_aws_credentials
} }
main "$@" main "$@"

View File

@ -75,7 +75,7 @@ Data are received by this protocol and written to a [TimeSeries](/en/engines/tab
<my_rule_1> <my_rule_1>
<url>/write</url> <url>/write</url>
<handler> <handler>
<type>remote_write</type <type>remote_write</type>
<database>db_name</database> <database>db_name</database>
<table>time_series_table</table> <table>time_series_table</table>
</handler> </handler>
@ -105,7 +105,7 @@ Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series)
<my_rule_1> <my_rule_1>
<url>/read</url> <url>/read</url>
<handler> <handler>
<type>remote_read</type <type>remote_read</type>
<database>db_name</database> <database>db_name</database>
<table>time_series_table</table> <table>time_series_table</table>
</handler> </handler>
@ -144,14 +144,14 @@ Multiple protocols can be specified together in one place:
<my_rule_2> <my_rule_2>
<url>/write</url> <url>/write</url>
<handler> <handler>
<type>remote_write</type <type>remote_write</type>
<table>db_name.time_series_table</table> <table>db_name.time_series_table</table>
</handler> </handler>
</my_rule_2> </my_rule_2>
<my_rule_3> <my_rule_3>
<url>/read</url> <url>/read</url>
<handler> <handler>
<type>remote_read</type <type>remote_read</type>
<table>db_name.time_series_table</table> <table>db_name.time_series_table</table>
</handler> </handler>
</my_rule_3> </my_rule_3>

File diff suppressed because it is too large Load Diff

View File

@ -143,7 +143,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
if (fs::exists(config_path)) if (fs::exists(config_path))
{ {
ConfigProcessor config_processor(config_path, false, true); ConfigProcessor config_processor(config_path);
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig(); auto loaded_config = config_processor.loadConfig();
getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);

View File

@ -68,13 +68,19 @@ const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const
WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const
{ {
/// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there zkutil::ZooKeeperPtr current_zookeeper;
{
std::lock_guard lock(zookeeper_mutex);
current_zookeeper = zookeeper;
}
/// We need to create new instance of ZooKeeperWithFaultInjection each time and copy a pointer to ZooKeeper client there
/// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition /// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition
/// when the same object is used from multiple threads. /// when the same object is used from multiple threads.
auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance( auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance(
settings.keeper_fault_injection_probability, settings.keeper_fault_injection_probability,
settings.keeper_fault_injection_seed, settings.keeper_fault_injection_seed,
zookeeper, current_zookeeper,
log->name(), log->name(),
log); log);

View File

@ -200,8 +200,6 @@ void ClientApplicationBase::init(int argc, char ** argv)
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)") ("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server") ("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
("fuzzer-args", po::value<std::string>(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.")
("client_logs_file", po::value<std::string>(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)") ("client_logs_file", po::value<std::string>(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)")
; ;

View File

@ -237,7 +237,14 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
res->write_fds.emplace(fd, fds.fds_rw[1]); res->write_fds.emplace(fd, fds.fds_rw[1]);
} }
LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid); LOG_TRACE(
getLogger(),
"Started shell command '{}' with pid {} and file descriptors: out {}, err {}",
filename,
pid,
res->out.getFD(),
res->err.getFD());
return res; return res;
} }

View File

@ -605,7 +605,7 @@ class IColumn;
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \

View File

@ -84,6 +84,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."}, {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"}, {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."}, {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
{"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},
} }
}, },
{"24.7", {"24.7",

View File

@ -305,7 +305,8 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
S3::ListObjectsV2Request request; S3::ListObjectsV2Request request;
request.SetBucket(uri.bucket); request.SetBucket(uri.bucket);
request.SetPrefix(path); if (path != "/")
request.SetPrefix(path);
if (max_keys) if (max_keys)
request.SetMaxKeys(static_cast<int>(max_keys)); request.SetMaxKeys(static_cast<int>(max_keys));
else else

View File

@ -3,6 +3,7 @@
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Core/Settings.h> #include <Core/Settings.h>
#include <Interpreters/parseColumnsListForTableFunction.h> #include <Interpreters/parseColumnsListForTableFunction.h>
@ -35,7 +36,7 @@ FunctionBasePtr createFunctionBaseCast(
class CastOverloadResolverImpl : public IFunctionOverloadResolver class CastOverloadResolverImpl : public IFunctionOverloadResolver
{ {
public: public:
const char * getNameImpl() const static const char * getNameImpl(CastType cast_type, bool internal)
{ {
if (cast_type == CastType::accurate) if (cast_type == CastType::accurate)
return "accurateCast"; return "accurateCast";
@ -49,7 +50,7 @@ public:
String getName() const override String getName() const override
{ {
return getNameImpl(); return getNameImpl(cast_type, internal);
} }
size_t getNumberOfArguments() const override { return 2; } size_t getNumberOfArguments() const override { return 2; }
@ -79,10 +80,22 @@ public:
} }
} }
static FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional<CastDiagnostic> diagnostic)
{
if (cast_type == CastType::accurateOrNull && !isVariant(to))
to = makeNullable(to);
ColumnsWithTypeAndName arguments;
arguments.emplace_back(std::move(from));
arguments.emplace_back().type = std::make_unique<DataTypeString>();
return createFunctionBaseCast(nullptr, getNameImpl(cast_type, true), arguments, to, diagnostic, cast_type);
}
protected: protected:
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{ {
return createFunctionBaseCast(context, getNameImpl(), arguments, return_type, diagnostic, cast_type); return createFunctionBaseCast(context, getNameImpl(cast_type, internal), arguments, return_type, diagnostic, cast_type);
} }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
@ -130,9 +143,9 @@ private:
}; };
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic) FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional<CastDiagnostic> diagnostic)
{ {
return CastOverloadResolverImpl::create(ContextPtr{}, type, true, diagnostic); return CastOverloadResolverImpl::createInternalCast(std::move(from), std::move(to), cast_type, std::move(diagnostic));
} }
REGISTER_FUNCTION(CastOverloadResolvers) REGISTER_FUNCTION(CastOverloadResolvers)

View File

@ -3,6 +3,7 @@
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
#include <Core/ColumnWithTypeAndName.h>
namespace DB namespace DB
@ -11,6 +12,9 @@ namespace DB
class IFunctionOverloadResolver; class IFunctionOverloadResolver;
using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>; using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
class IFunctionBase;
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
enum class CastType : uint8_t enum class CastType : uint8_t
{ {
nonAccurate, nonAccurate,
@ -24,6 +28,6 @@ struct CastDiagnostic
std::string column_to; std::string column_to;
}; };
FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional<CastDiagnostic> diagnostic); FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional<CastDiagnostic> diagnostic);
} }

View File

@ -54,8 +54,7 @@ namespace
} }
}; };
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); auto func_cast = createInternalCast(arguments[0], result_type, CastType::nonAccurate, {});
auto func_cast = func_builder_cast->build(cast_args);
return func_cast->execute(cast_args, result_type, arguments[0].column->size()); return func_cast->execute(cast_args, result_type, arguments[0].column->size());
} }
}; };

View File

@ -301,11 +301,11 @@ const ActionsDAG::Node & ActionsDAG::addCast(const Node & node_to_cast, const Da
column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
column.type = std::make_shared<DataTypeString>(); column.type = std::make_shared<DataTypeString>();
const auto * cast_type_constant_node = &addColumn(std::move(column)); const auto * cast_type_constant_node = &addColumn(column);
ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node}; ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); auto func_base_cast = createInternalCast(ColumnWithTypeAndName{node_to_cast.result_type, node_to_cast.result_name}, cast_type, CastType::nonAccurate, {});
return addFunction(func_builder_cast, std::move(children), result_name); return addFunction(func_base_cast, std::move(children), result_name);
} }
const ActionsDAG::Node & ActionsDAG::addFunctionImpl( const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
@ -1547,11 +1547,11 @@ ActionsDAG ActionsDAG::makeConvertingActions(
const auto * left_arg = dst_node; const auto * left_arg = dst_node;
CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name}; CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name};
FunctionOverloadResolverPtr func_builder_cast ColumnWithTypeAndName left_column{nullptr, dst_node->result_type, {}};
= createInternalCastOverloadResolver(CastType::nonAccurate, std::move(diagnostic)); auto func_base_cast = createInternalCast(std::move(left_column), res_elem.type, CastType::nonAccurate, std::move(diagnostic));
NodeRawConstPtrs children = { left_arg, right_arg }; NodeRawConstPtrs children = { left_arg, right_arg };
dst_node = &actions_dag.addFunction(func_builder_cast, std::move(children), {}); dst_node = &actions_dag.addFunction(func_base_cast, std::move(children), {});
} }
if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column))) if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column)))

View File

@ -26,11 +26,9 @@ static ColumnPtr castColumn(CastType cast_type, const ColumnWithTypeAndName & ar
"" ""
} }
}; };
auto get_cast_func = [cast_type, &arguments] auto get_cast_func = [from = arg, to = type, cast_type]
{ {
return createInternalCast(from, to, cast_type, {});
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(cast_type, {});
return func_builder_cast->build(arguments);
}; };
FunctionBasePtr func_cast = cache ? cache->getOrSet(cast_type, from_name, to_name, std::move(get_cast_func)) : get_cast_func(); FunctionBasePtr func_cast = cache ? cache->getOrSet(cast_type, from_name, to_name, std::move(get_cast_func)) : get_cast_func();

View File

@ -255,20 +255,13 @@ static void appendAggregateFunctions(
const auto * node = input; const auto * node = input;
if (node->result_name != aggregate.column_name) if (!DataTypeAggregateFunction::strictEquals(type, node->result_type))
{ /// Cast to aggregate types specified in query if it's not
if (DataTypeAggregateFunction::strictEquals(type, node->result_type)) /// strictly the same as the one specified in projection. This
{ /// is required to generate correct results during finalization.
node = &proj_dag.addAlias(*node, aggregate.column_name); node = &proj_dag.addCast(*node, type, aggregate.column_name);
} else if (node->result_name != aggregate.column_name)
else node = &proj_dag.addAlias(*node, aggregate.column_name);
{
/// Cast to aggregate types specified in query if it's not
/// strictly the same as the one specified in projection. This
/// is required to generate correct results during finalization.
node = &proj_dag.addCast(*node, type, aggregate.column_name);
}
}
proj_dag_outputs.push_back(node); proj_dag_outputs.push_back(node);
} }

View File

@ -8,13 +8,15 @@
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <QueryPipeline/Pipe.h>
#include <Processors/ISimpleTransform.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/ISimpleTransform.h>
#include <QueryPipeline/Pipe.h>
#include <boost/circular_buffer.hpp> #include <boost/circular_buffer.hpp>
#include <ranges>
namespace DB namespace DB
{ {
@ -68,11 +70,17 @@ static void makeFdBlocking(int fd)
static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds) static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds)
{ {
auto logger = getLogger("TimeoutReadBufferFromFileDescriptor");
auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); };
int res; int res;
while (true) while (true)
{ {
Stopwatch watch; Stopwatch watch;
LOG_TEST(logger, "Polling descriptors: {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "));
res = poll(pfds, static_cast<nfds_t>(num), static_cast<int>(timeout_milliseconds)); res = poll(pfds, static_cast<nfds_t>(num), static_cast<int>(timeout_milliseconds));
if (res < 0) if (res < 0)
@ -82,7 +90,10 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond
const auto elapsed = watch.elapsedMilliseconds(); const auto elapsed = watch.elapsedMilliseconds();
if (timeout_milliseconds <= elapsed) if (timeout_milliseconds <= elapsed)
{
LOG_TEST(logger, "Timeout exceeded: elapsed={}, timeout={}", elapsed, timeout_milliseconds);
break; break;
}
timeout_milliseconds -= elapsed; timeout_milliseconds -= elapsed;
} }
else else
@ -91,6 +102,12 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond
} }
} }
LOG_TEST(
logger,
"Poll for descriptors: {} returned {}",
fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "),
res);
return res; return res;
} }
@ -200,12 +217,6 @@ public:
return true; return true;
} }
void reset() const
{
makeFdBlocking(stdout_fd);
makeFdBlocking(stderr_fd);
}
~TimeoutReadBufferFromFileDescriptor() override ~TimeoutReadBufferFromFileDescriptor() override
{ {
tryMakeFdBlocking(stdout_fd); tryMakeFdBlocking(stdout_fd);

View File

@ -2337,22 +2337,9 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
argument_types[2]->getName()); argument_types[2]->getName());
} }
const auto from_name = argument_types[2]->getName(); auto get_cast_func = [from = argument_types[2], to = argument_types[0]]
const auto to_name = argument_types[0]->getName();
ColumnsWithTypeAndName arguments
{ {
{ argument_types[2], "" }, return createInternalCast({from, {}}, to, CastType::accurate, {});
{
DataTypeString().createColumnConst(0, to_name),
std::make_shared<DataTypeString>(),
""
}
};
auto get_cast_func = [&arguments]
{
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::accurate, {});
return func_builder_cast->build(arguments);
}; };
func_cast = get_cast_func(); func_cast = get_cast_func();

View File

@ -749,8 +749,16 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
/// Probably there is something wrong with files of this part. /// Probably there is something wrong with files of this part.
/// So it can be helpful to add to the error message some information about those files. /// So it can be helpful to add to the error message some information about those files.
String files_in_part; String files_in_part;
for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next()) for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next())
files_in_part += fmt::format("{}{} ({} bytes)", (files_in_part.empty() ? "" : ", "), it->name(), getDataPartStorage().getFileSize(it->name())); {
std::string file_info;
if (!getDataPartStorage().isDirectory(it->name()))
file_info = fmt::format(" ({} bytes)", getDataPartStorage().getFileSize(it->name()));
files_in_part += fmt::format("{}{}{}", (files_in_part.empty() ? "" : ", "), it->name(), file_info);
}
if (!files_in_part.empty()) if (!files_in_part.empty())
e->addMessage("Part contains files: {}", files_in_part); e->addMessage("Part contains files: {}", files_in_part);
if (isEmpty()) if (isEmpty())
@ -2141,7 +2149,27 @@ void IMergeTreeDataPart::checkConsistencyBase() const
} }
} }
checksums.checkSizes(getDataPartStorage()); const auto & data_part_storage = getDataPartStorage();
for (const auto & [filename, checksum] : checksums.files)
{
try
{
checksum.checkSize(data_part_storage, filename);
}
catch (const Exception & ex)
{
/// For projection parts check will mark them broken in loadProjections
if (!parent_part && filename.ends_with(".proj"))
{
std::string projection_name = fs::path(filename).stem();
LOG_INFO(storage.log, "Projection {} doesn't exist on start for part {}, marking it as broken", projection_name, name);
if (hasProjection(projection_name))
markProjectionPartAsBroken(projection_name, ex.message(), ex.code());
}
else
throw;
}
}
} }
else else
{ {

View File

@ -1956,11 +1956,8 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
auto common_type_maybe_nullable = (key_expr_type_is_nullable && !common_type->isNullable()) auto common_type_maybe_nullable = (key_expr_type_is_nullable && !common_type->isNullable())
? DataTypePtr(std::make_shared<DataTypeNullable>(common_type)) ? DataTypePtr(std::make_shared<DataTypeNullable>(common_type))
: common_type; : common_type;
ColumnsWithTypeAndName arguments{
{nullptr, key_expr_type, ""}, auto func_cast = createInternalCast({key_expr_type, {}}, common_type_maybe_nullable, CastType::nonAccurate, {});
{DataTypeString().createColumnConst(1, common_type_maybe_nullable->getName()), common_type_maybe_nullable, ""}};
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
auto func_cast = func_builder_cast->build(arguments);
/// If we know the given range only contains one value, then we treat all functions as positive monotonic. /// If we know the given range only contains one value, then we treat all functions as positive monotonic.
if (!single_point && !func_cast->hasInformationAboutMonotonicity()) if (!single_point && !func_cast->hasInformationAboutMonotonicity())

View File

@ -1146,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
auto metadata_snapshot = getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr();
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_partial_result=*/ false);
if (!filter_dag) if (!filter_dag)
return {}; return {};
@ -6932,7 +6932,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
const auto * predicate = filter_dag->getOutputs().at(0); const auto * predicate = filter_dag->getOutputs().at(0);
// Generate valid expressions for filtering // Generate valid expressions for filtering
VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context); VirtualColumnUtils::filterBlockWithPredicate(
predicate, virtual_columns_block, query_context, /*allow_filtering_with_partial_predicate =*/true);
rows = virtual_columns_block.rows(); rows = virtual_columns_block.rows();
part_name_column = virtual_columns_block.getByName("_part").column; part_name_column = virtual_columns_block.getByName("_part").column;

View File

@ -100,12 +100,6 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
} }
} }
void MergeTreeDataPartChecksums::checkSizes(const IDataPartStorage & storage) const
{
for (const auto & [name, checksum] : files)
checksum.checkSize(storage, name);
}
UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const
{ {
UInt64 res = 0; UInt64 res = 0;

View File

@ -65,9 +65,6 @@ struct MergeTreeDataPartChecksums
static bool isBadChecksumsErrorCode(int code); static bool isBadChecksumsErrorCode(int code);
/// Checks that the directory contains all the needed files of the correct size. Does not check the checksum.
void checkSizes(const IDataPartStorage & storage) const;
/// Returns false if the checksum is too old. /// Returns false if the checksum is too old.
bool read(ReadBuffer & in); bool read(ReadBuffer & in);
/// Assume that header with version (the first line) is read /// Assume that header with version (the first line) is read

View File

@ -152,23 +152,15 @@ const ActionsDAG::Node & addFunction(
const ActionsDAG::Node & addCast( const ActionsDAG::Node & addCast(
const ActionsDAGPtr & dag, const ActionsDAGPtr & dag,
const ActionsDAG::Node & node_to_cast, const ActionsDAG::Node & node_to_cast,
const String & type_name, const DataTypePtr & to_type,
OriginalToNewNodeMap & node_remap) OriginalToNewNodeMap & node_remap)
{ {
if (node_to_cast.result_type->getName() == type_name) if (!node_to_cast.result_type->equals(*to_type))
return node_to_cast; return node_to_cast;
Field cast_type_constant_value(type_name); const auto & new_node = dag->addCast(node_to_cast, to_type, {});
node_remap[new_node.result_name] = {dag.get(), &new_node};
ColumnWithTypeAndName column; return new_node;
column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
column.type = std::make_shared<DataTypeString>();
const auto * cast_type_constant_node = &dag->addColumn(std::move(column));
ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {});
return addFunction(dag, func_builder_cast, std::move(children), node_remap);
} }
/// Normalizes the filter node by adding AND with a constant true. /// Normalizes the filter node by adding AND with a constant true.
@ -332,7 +324,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction
/// Build AND(last_step_result_node, true) /// Build AND(last_step_result_node, true)
const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap); const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap);
/// Build CAST(and_node, type of PREWHERE column) /// Build CAST(and_node, type of PREWHERE column)
const auto & cast_node = addCast(last_step_dag, and_node, output->result_type->getName(), node_remap); const auto & cast_node = addCast(last_step_dag, and_node, output->result_type, node_remap);
/// Add alias for the result with the name of the PREWHERE column /// Add alias for the result with the name of the PREWHERE column
const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name); const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name);
last_step_dag->addOrReplaceInOutputs(prewhere_result_node); last_step_dag->addOrReplaceInOutputs(prewhere_result_node);

View File

@ -1,51 +1,46 @@
#include <algorithm> #include <Storages/VirtualColumnUtils.h>
#include <memory> #include <memory>
#include <stack> #include <stack>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnSet.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/FilterDescription.h>
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Core/TypeId.h> #include <Core/TypeId.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsLogical.h>
#include <Functions/IFunction.h>
#include <Functions/IFunctionAdaptors.h>
#include <Functions/indexHint.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h> #include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/IdentifierSemantic.h> #include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/misc.h> #include <Interpreters/misc.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTExpressionList.h> #include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h> #include <Parsers/ASTSubquery.h>
#include <Parsers/makeASTForLogicalFunction.h>
#include <Columns/ColumnConst.h> #include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/FilterDescription.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h> #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h> #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/Sinks/EmptySink.h> #include <Processors/Sinks/EmptySink.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <QueryPipeline/QueryPipelineBuilder.h> #include <QueryPipeline/QueryPipelineBuilder.h>
#include <Storages/VirtualColumnUtils.h>
#include <IO/WriteHelpers.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include "Functions/FunctionsLogical.h"
#include "Functions/IFunction.h"
#include "Functions/IFunctionAdaptors.h"
#include "Functions/indexHint.h"
#include <Parsers/makeASTForLogicalFunction.h>
#include <Columns/ColumnSet.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/ActionsVisitor.h>
namespace DB namespace DB
@ -281,9 +276,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node)
} }
static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
const ActionsDAG::Node * node, const ActionsDAG::Node * node, const Block * allowed_inputs, ActionsDAG::Nodes & additional_nodes, bool allow_partial_result)
const Block * allowed_inputs,
ActionsDAG::Nodes & additional_nodes)
{ {
if (node->type == ActionsDAG::ActionType::FUNCTION) if (node->type == ActionsDAG::ActionType::FUNCTION)
{ {
@ -292,8 +285,15 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto & node_copy = additional_nodes.emplace_back(*node); auto & node_copy = additional_nodes.emplace_back(*node);
node_copy.children.clear(); node_copy.children.clear();
for (const auto * child : node->children) for (const auto * child : node->children)
if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) if (const auto * child_copy
= splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result))
node_copy.children.push_back(child_copy); node_copy.children.push_back(child_copy);
/// Expression like (now_allowed AND allowed) is not allowed if allow_partial_result = true. This is important for
/// trivial count optimization, otherwise we can get incorrect results. For example, if the query is
/// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply
/// trivial count.
else if (!allow_partial_result)
return nullptr;
if (node_copy.children.empty()) if (node_copy.children.empty())
return nullptr; return nullptr;
@ -301,7 +301,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
if (node_copy.children.size() == 1) if (node_copy.children.size() == 1)
{ {
const ActionsDAG::Node * res = node_copy.children.front(); const ActionsDAG::Node * res = node_copy.children.front();
/// Expression like (not_allowed AND 256) can't be resuced to (and(256)) because AND requires /// Expression like (not_allowed AND 256) can't be reduced to (and(256)) because AND requires
/// at least two arguments; also it can't be reduced to (256) because result type is different. /// at least two arguments; also it can't be reduced to (256) because result type is different.
if (!res->result_type->equals(*node->result_type)) if (!res->result_type->equals(*node->result_type))
{ {
@ -319,7 +319,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
{ {
auto & node_copy = additional_nodes.emplace_back(*node); auto & node_copy = additional_nodes.emplace_back(*node);
for (auto & child : node_copy.children) for (auto & child : node_copy.children)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result); !child)
return nullptr; return nullptr;
return &node_copy; return &node_copy;
@ -333,7 +333,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto index_hint_dag = index_hint->getActions().clone(); auto index_hint_dag = index_hint->getActions().clone();
ActionsDAG::NodeRawConstPtrs atoms; ActionsDAG::NodeRawConstPtrs atoms;
for (const auto & output : index_hint_dag.getOutputs()) for (const auto & output : index_hint_dag.getOutputs())
if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) if (const auto * child_copy
= splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_partial_result))
atoms.push_back(child_copy); atoms.push_back(child_copy);
if (!atoms.empty()) if (!atoms.empty())
@ -367,22 +368,24 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
return node; return node;
} }
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) std::optional<ActionsDAG>
splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result)
{ {
if (!predicate) if (!predicate)
return {}; return {};
ActionsDAG::Nodes additional_nodes; ActionsDAG::Nodes additional_nodes;
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_partial_result);
if (!res) if (!res)
return {}; return {};
return ActionsDAG::cloneSubDAG({res}, true); return ActionsDAG::cloneSubDAG({res}, true);
} }
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) void filterBlockWithPredicate(
const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate)
{ {
auto dag = splitFilterDagForAllowedInputs(predicate, &block); auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_partial_result=*/allow_filtering_with_partial_predicate);
if (dag) if (dag)
filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block); filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block);
} }

View File

@ -26,9 +26,13 @@ namespace VirtualColumnUtils
/// ///
/// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed" /// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed"
/// if there are subqueries. /// if there are subqueries.
///
/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...)))./// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate.
/// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs).
/// If allow_filtering_with_partial_predicate is true, then the filtering will be done even if some part of the predicate
/// cannot be evaluated using the columns from the block.
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate = true);
/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...))).
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context);
/// Just filters block. Block should contain all the required columns. /// Just filters block. Block should contain all the required columns.
ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context); ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context);
@ -41,7 +45,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node);
/// Extract a part of predicate that can be evaluated using only columns from input_names. /// Extract a part of predicate that can be evaluated using only columns from input_names.
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); /// When allow_partial_result is false, then the result will be empty if any part of if cannot be evaluated deterministically
/// on the given inputs.
/// allow_partial_result must be false when we are going to use the result to filter parts in
/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is
/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1`
/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is
/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial
/// count optimization will be mistakenly applied to the query.
std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result = true);
/// Extract from the input stream a set of `name` column values /// Extract from the input stream a set of `name` column values
template <typename T> template <typename T>

View File

@ -985,6 +985,7 @@ def _run_test(job_name: str, run_command: str) -> int:
else: else:
print("Use run command from the workflow") print("Use run command from the workflow")
env["CHECK_NAME"] = job_name env["CHECK_NAME"] = job_name
env["MAX_RUN_TIME"] = str(timeout or 0)
print(f"Going to start run command [{run_command}]") print(f"Going to start run command [{run_command}]")
stopwatch = Stopwatch() stopwatch = Stopwatch()
job_log = Path(TEMP_PATH) / "job_log.txt" job_log = Path(TEMP_PATH) / "job_log.txt"

View File

@ -114,6 +114,9 @@ def get_run_command(
if flaky_check: if flaky_check:
envs.append("-e NUM_TRIES=50") envs.append("-e NUM_TRIES=50")
envs.append("-e MAX_RUN_TIME=2800") envs.append("-e MAX_RUN_TIME=2800")
else:
max_run_time = os.getenv("MAX_RUN_TIME", "0")
envs.append(f"-e MAX_RUN_TIME={max_run_time}")
envs += [f"-e {e}" for e in additional_envs] envs += [f"-e {e}" for e in additional_envs]

View File

@ -738,7 +738,7 @@ def create_test_html_report(
if test_results: if test_results:
rows_part = [] rows_part = []
num_fails = 0 num_fails = 0
has_test_time = False has_test_time = any(tr.time is not None for tr in test_results)
has_log_urls = False has_log_urls = False
# Display entires with logs at the top (they correspond to failed tests) # Display entires with logs at the top (they correspond to failed tests)
@ -770,12 +770,12 @@ def create_test_html_report(
row.append(f'<td {fail_id}style="{style}">{test_result.status}</td>') row.append(f'<td {fail_id}style="{style}">{test_result.status}</td>')
colspan += 1 colspan += 1
row.append("<td>") if has_test_time:
if test_result.time is not None: if test_result.time is not None:
has_test_time = True row.append(f"<td>{test_result.time}</td>")
row.append(str(test_result.time)) else:
row.append("</td>") row.append("<td></td>")
colspan += 1 colspan += 1
if test_result.log_urls is not None: if test_result.log_urls is not None:
has_log_urls = True has_log_urls = True

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<clickhouse>
<merge_tree>
<max_suspicious_broken_parts_bytes>0</max_suspicious_broken_parts_bytes>
</merge_tree>
</clickhouse>

View File

@ -4,6 +4,7 @@ import logging
import string import string
import random import random
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
from multiprocessing.dummy import Pool
cluster = ClickHouseCluster(__file__) cluster = ClickHouseCluster(__file__)
@ -18,6 +19,12 @@ def cluster():
stay_alive=True, stay_alive=True,
with_zookeeper=True, with_zookeeper=True,
) )
cluster.add_instance(
"node_restart",
main_configs=["config.d/dont_start_broken.xml"],
stay_alive=True,
with_zookeeper=True,
)
logging.info("Starting cluster...") logging.info("Starting cluster...")
cluster.start() cluster.start()
@ -632,6 +639,49 @@ def test_broken_on_start(cluster):
check(node, table_name, 0) check(node, table_name, 0)
def test_disappeared_projection_on_start(cluster):
node = cluster.instances["node_restart"]
table_name = "test_disapperead_projection"
create_table(node, table_name, 1)
node.query(f"SYSTEM STOP MERGES {table_name}")
insert(node, table_name, 0, 5)
insert(node, table_name, 5, 5)
insert(node, table_name, 10, 5)
insert(node, table_name, 15, 5)
assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
node, table_name
)
def drop_projection():
node.query(
f"ALTER TABLE {table_name} DROP PROJECTION proj2",
settings={"mutations_sync": "0"},
)
p = Pool(2)
p.apply_async(drop_projection)
for i in range(30):
create_query = node.query(f"SHOW CREATE TABLE {table_name}")
if "proj2" not in create_query:
break
time.sleep(0.5)
assert "proj2" not in create_query
# Remove 'proj2' for part all_2_2_0
break_projection(node, table_name, "proj2", "all_2_2_0", "part")
node.restart_clickhouse()
# proj2 is not broken, it doesn't exist, but ok
check(node, table_name, 0, expect_broken_part="proj2", do_check_command=0)
def test_mutation_with_broken_projection(cluster): def test_mutation_with_broken_projection(cluster):
node = cluster.instances["node"] node = cluster.instances["node"]

View File

@ -176,7 +176,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table):
select_handler = node.get_query_request( select_handler = node.get_query_request(
f""" f"""
SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0, max_threads=1;
""", """,
query_id=query_id, query_id=query_id,
) )

View File

@ -771,7 +771,11 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
table_name, table_name,
mode, mode,
files_path, files_path,
additional_settings={"keeper_path": keeper_path, "s3queue_buckets": 2}, additional_settings={
"keeper_path": keeper_path,
"s3queue_buckets": 2,
**({"s3queue_processing_threads_num": 1} if mode == "ordered" else {}),
},
) )
for instance in [node, node_2]: for instance in [node, node_2]:
@ -806,6 +810,10 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
list(map(int, l.split())) for l in run_query(node_2, get_query).splitlines() list(map(int, l.split())) for l in run_query(node_2, get_query).splitlines()
] ]
logging.debug(
f"res1 size: {len(res1)}, res2 size: {len(res2)}, total_rows: {total_rows}"
)
assert len(res1) + len(res2) == total_rows assert len(res1) + len(res2) == total_rows
# Checking that all engines have made progress # Checking that all engines have made progress

View File

@ -0,0 +1,26 @@
<test>
<settings>
<max_insert_threads>4</max_insert_threads>
</settings>
<create_query>
CREATE TABLE t_subcolumns (a Array(UInt64), s Nullable(String), m Map(String, UInt64)) ENGINE = MergeTree ORDER BY tuple()
</create_query>
<fill_query>
INSERT INTO t_subcolumns SELECT range(number % 20), toString(number), mapFromArrays(range(number % 20), range(number % 20)) FROM numbers_mt(50000000)
</fill_query>
<fill_query>
OPTIMIZE TABLE t_subcolumns FINAL
</fill_query>
<query>SELECT count() FROM t_subcolumns WHERE NOT ignore(length(a))</query>
<query>SELECT count() FROM t_subcolumns WHERE notEmpty(a)</query>
<query>SELECT count() FROM t_subcolumns WHERE NOT ignore(length(m))</query>
<query>SELECT count() FROM t_subcolumns WHERE notEmpty(m)</query>
<query>SELECT count() FROM t_subcolumns WHERE isNotNull(s)</query>
<query>SELECT count(s) FROM t_subcolumns</query>
<drop_query>DROP TABLE t_subcolumns</drop_query>
</test>

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Tags: long, no-fasttest, no-debug # Tags: long, no-fasttest, no-debug, no-asan, no-msan, no-tsan
# #
# Load all possible .parquet files found in submodules. # Load all possible .parquet files found in submodules.

View File

@ -27,7 +27,7 @@ function wait_until()
function get_buffer_delay() function get_buffer_delay()
{ {
local buffer_insert_id=$1 && shift local buffer_insert_id=$1 && shift
query "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
query " query "
WITH WITH
(SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_, (SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_,

View File

@ -1,10 +1,16 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Tags: long # Tags: long, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh # shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh . "$CURDIR"/../shell_config.sh
function query()
{
# NOTE: database_atomic_wait_for_drop_and_detach_synchronously needed only for local env, CI has it ON
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*"
}
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted # NOTE: database = $CLICKHOUSE_DATABASE is unwanted
verify_sql="SELECT verify_sql="SELECT
(SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics)
@ -18,13 +24,13 @@ verify()
{ {
for i in {1..5000} for i in {1..5000}
do do
result=$( $CLICKHOUSE_CLIENT --query="$verify_sql" ) result=$( query "$verify_sql" )
[ "$result" = "1" ] && echo "$result" && break [ "$result" = "1" ] && echo "$result" && break
sleep 0.1 sleep 0.1
if [[ $i -eq 5000 ]] if [[ $i -eq 5000 ]]
then then
$CLICKHOUSE_CLIENT " query "
SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics;
SELECT sum(active), sum(NOT active) FROM system.parts; SELECT sum(active), sum(NOT active) FROM system.parts;
SELECT sum(active), sum(NOT active) FROM system.projection_parts; SELECT sum(active), sum(NOT active) FROM system.projection_parts;
@ -34,17 +40,17 @@ verify()
done done
} }
$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" query "DROP TABLE IF EXISTS test_table"
$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" query "CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;"
$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" query "INSERT INTO test_table VALUES ('1992-01-01')"
verify verify
$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" query "INSERT INTO test_table VALUES ('1992-01-02')"
verify verify
$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" query "OPTIMIZE TABLE test_table FINAL"
verify verify
$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" query "DROP TABLE test_table"
verify verify

View File

@ -17,7 +17,7 @@ used_functions
['repeat'] ['repeat']
arraySort(used_data_type_families) arraySort(used_data_type_families)
['Array','Int32','Nullable','String'] ['Int32','Nullable','String']
used_database_engines used_database_engines
['Atomic'] ['Atomic']

View File

@ -1,2 +1,3 @@
3 3
950 990 500 2000 950 990 500 2000
[950] [999]

View File

@ -29,4 +29,6 @@ FROM cluster('test_cluster_two_shards', currentDatabase(), r)
WHERE a = 'x' WHERE a = 'x'
settings prefer_localhost_replica=0; settings prefer_localhost_replica=0;
SELECT quantilesTimingMerge(0.95)(q), quantilesTimingMerge(toInt64(1))(q) FROM remote('127.0.0.{1,2}', currentDatabase(), r);
DROP TABLE r; DROP TABLE r;

View File

@ -1,4 +1,6 @@
Code: 159 Code: 159
0 query_duration 1
0
query_duration 1
Code: 159 Code: 159
0 0

View File

@ -1,27 +1,23 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Tags: no-debug
# no-debug: Query is canceled by timeout after max_execution_time,
# but sending an exception to the client may hang
# for more than MAX_PROCESS_WAIT seconds in a slow debug build,
# and test will fail.
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh # shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh . "$CURDIR"/../shell_config.sh
MAX_PROCESS_WAIT=5 TIMEOUT=5
IS_SANITIZER_OR_DEBUG=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%' or message like '%built in debug mode%'")
IS_SANITIZER=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%'") if [ "$IS_SANITIZER_OR_DEBUG" -gt 0 ]; then
if [ "$IS_SANITIZER" -gt 0 ]; then # Increase the timeout due to in debug/sanitizers build:
# Query may hang for more than 5 seconds, especially in tsan build # - client is slow
MAX_PROCESS_WAIT=15 # - stacktrace resolving is slow
TIMEOUT=15
fi fi
# TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor
### Should be cancelled after 1 second and return a 159 exception (timeout) ### Should be cancelled after 1 second and return a 159 exception (timeout)
timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --max_execution_time 1 -q \ query_id=$(random_str 12)
"SELECT * FROM $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --query_id "$query_id" --max_execution_time 1 -q "
SELECT * FROM
( (
SELECT a.name as n SELECT a.name as n
FROM FROM
@ -34,28 +30,35 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_r
GROUP BY n GROUP BY n
) )
LIMIT 20 LIMIT 20
FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq FORMAT Null
" 2>&1 | grep -m1 -o "Code: 159"
$CLICKHOUSE_CLIENT -q "system flush logs"
${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'"
### Should stop pulling data and return what has been generated already (return code 0) ### Should stop pulling data and return what has been generated already (return code 0)
timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 -q \ query_id=$(random_str 12)
"SELECT a.name as n $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --query_id "$query_id" -q "
FROM SELECT a.name as n
( FROM
SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 (
) AS a, SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000
( ) AS a,
SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 (
) as b SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000
FORMAT Null ) as b
SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' FORMAT Null
" SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break'
"
echo $? echo $?
$CLICKHOUSE_CLIENT -q "system flush logs"
${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'"
# HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor
### Should be cancelled after 1 second and return a 159 exception (timeout) ### Should be cancelled after 1 second and return a 159 exception (timeout)
${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0&max_execution_time=1" -d \ ${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_execution_time=1&max_result_rows=0&max_result_bytes=0" -d "
"SELECT * FROM SELECT * FROM
( (
SELECT a.name as n SELECT a.name as n
FROM FROM
@ -68,12 +71,13 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_resu
GROUP BY n GROUP BY n
) )
LIMIT 20 LIMIT 20
FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq FORMAT Null
" 2>&1 | grep -o "Code: 159" | sort | uniq
### Should stop pulling data and return what has been generated already (return code 0) ### Should stop pulling data and return what has been generated already (return code 0)
${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0" -d \ ${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0" -d "
"SELECT a.name as n SELECT a.name as n
FROM FROM
( (
SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000
@ -83,5 +87,5 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_resu
) as b ) as b
FORMAT Null FORMAT Null
SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break'
" "
echo $? echo $?

View File

@ -0,0 +1,4 @@
CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x;
INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100;
SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0;
SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1;

View File

@ -0,0 +1,6 @@
Expression ((Project names + Projection))
Aggregating
Expression (Before GROUP BY)
ReadFromMerge
Filter (( + ( + )))
ReadFromMergeTree (default.test_03217_merge_replica_1)

View File

@ -0,0 +1,16 @@
CREATE TABLE test_03217_merge_replica_1(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r1')
ORDER BY x;
CREATE TABLE test_03217_merge_replica_2(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r2')
ORDER BY x;
CREATE TABLE test_03217_all_replicas (x UInt32)
ENGINE = Merge(currentDatabase(), 'test_03217_merge_replica_*');
INSERT INTO test_03217_merge_replica_1 SELECT number AS x FROM numbers(10);
SYSTEM SYNC REPLICA test_03217_merge_replica_2;
-- If the filter on _table is not applied, then the plan will show both replicas
EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table SETTINGS allow_experimental_analyzer=1;

View File

@ -0,0 +1,6 @@
information_schema tables
both default test_03217_system_tables_replica_1 r1
both default test_03217_system_tables_replica_2 r2
default test_03217_system_tables_replica_1 r1
1
1

View File

@ -0,0 +1,30 @@
-- If filtering is not done correctly on databases, then this query report to read 3 rows, which are: `system.tables`, `information_schema.tables` and `INFORMATION_SCHEMA.tables`
SELECT database, table FROM system.tables WHERE database = 'information_schema' AND table = 'tables';
CREATE TABLE test_03217_system_tables_replica_1(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r1')
ORDER BY x;
CREATE TABLE test_03217_system_tables_replica_2(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r2')
ORDER BY x;
-- Make sure we can read both replicas
-- The replica name might be altered because of `_functional_tests_helper_database_replicated_replace_args_macros`,
-- thus we need to use `left`
SELECT 'both', database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase();
-- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables
SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name LIKE 'r1%';
SYSTEM FLUSH LOGS;
-- argMax is necessary to make the test repeatable
-- StorageSystemTables
SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1
AND current_database = currentDatabase()
AND query LIKE '%SELECT database, table FROM system.tables WHERE database = \'information_schema\' AND table = \'tables\';'
AND type = 'QueryFinish';
-- StorageSystemReplicas
SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1
AND current_database = currentDatabase()
AND query LIKE '%SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name LIKE \'r1\%\';'
AND type = 'QueryFinish';

View File

@ -0,0 +1 @@
[(NULL,'11\01111111\011111','1111')] -2147483648 \N

View File

@ -0,0 +1,23 @@
SET enable_analyzer = 1;
SELECT
materialize([(NULL, '11\01111111\011111', '1111')]) AS t,
(t[1048576]).2,
materialize(-2147483648),
(t[-2147483648]).1
GROUP BY
materialize([(NULL, '1')]),
'',
(materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL, 2147483647, t[65535], 256)), materialize(NULL))
; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
SELECT
materialize([(NULL, '11\01111111\011111', '1111')]) AS t,
(t[1048576]).2,
materialize(-2147483648),
(t[-2147483648]).1
GROUP BY
materialize([(NULL, '1')]),
'',
(materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL), materialize(2147483647), materialize(t[65535]), materialize(256)), materialize(NULL))
;