Merge branch 'master' into ssl_keeper

This commit is contained in:
alesapin 2021-04-13 10:34:23 +03:00
commit cbd8a57fda
63 changed files with 775 additions and 97 deletions

View File

@ -14,7 +14,7 @@
#### New Feature
* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)).
* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)). Not every time and date functions are working for extended range of dates.
* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)).
* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)).
* Added `executable_pool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)).

View File

@ -58,8 +58,7 @@ ClickHouse artificially executes `INSERT` longer (adds sleep) so that the
## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert}
If the number of inactive parts in a single partition more than the `inactive_parts_to_throw_insert` value, `INSERT` is interrupted with the `Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts` exception.
If the number of inactive parts in a single partition more than the `inactive_parts_to_throw_insert` value, `INSERT` is interrupted with the "Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts" exception.
Possible values:

View File

@ -1565,6 +1565,17 @@ Possible values:
Default value: 0
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shardslrewrite-in}
Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards).
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 1 (since it requires `optimize_skip_unused_shards` anyway, which `0` by default)
## allow_nondeterministic_optimize_skip_unused_shards {#allow-nondeterministic-optimize-skip-unused-shards}
Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats with updates) functions in sharding key.

View File

@ -22,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Структура таблицы может отличаться от исходной структуры таблицы PostgreSQL:
- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.

View File

@ -55,6 +55,26 @@ Eсли число кусков в партиции превышает знач
ClickHouse искусственно выполняет `INSERT` дольше (добавляет sleep), чтобы фоновый механизм слияния успевал слиять куски быстрее, чем они добавляются.
## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert}
Если число неактивных кусков в партиции превышает значение `inactive_parts_to_throw_insert`, `INSERT` прерывается с исключением «Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts».
Возможные значения:
- Положительное целое число.
Значение по умолчанию: 0 (не ограничено).
## inactive_parts_to_delay_insert {#inactive-parts-to-delay-insert}
Если число неактивных кусков в партиции больше или равно значению `inactive_parts_to_delay_insert`, `INSERT` искусственно замедляется. Это полезно, когда сервер не может быстро очистить неактивные куски.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: 0 (не ограничено).
## max_delay_to_insert {#max-delay-to-insert}
Величина в секундах, которая используется для расчета задержки `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert).

View File

@ -222,8 +222,8 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
{
String pk_column = primary_key_expr_list->children[i]->getColumnName();
if (pk_column != sorting_key_column)
throw Exception("Primary key must be a prefix of the sorting key, but in position "
+ toString(i) + " its column is " + pk_column + ", not " + sorting_key_column,
throw Exception("Primary key must be a prefix of the sorting key, but the column in the position "
+ toString(i) + " is " + sorting_key_column +", not " + pk_column,
ErrorCodes::BAD_ARGUMENTS);
if (!primary_key_columns_set.emplace(pk_column).second)

View File

@ -750,6 +750,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setClustersConfig(config);
global_context->setMacros(std::make_unique<Macros>(*config, "macros", log));
global_context->setExternalAuthenticatorsConfig(*config);
global_context->setExternalModelsConfig(config);
/// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default)
if (config->has("max_table_size_to_drop"))
@ -1322,7 +1323,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
/// try to load dictionaries immediately, throw on error and die
ext::scope_guard dictionaries_xmls, models_xmls;
ext::scope_guard dictionaries_xmls;
try
{
if (!config().getBool("dictionaries_lazy_load", true))
@ -1332,8 +1333,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
dictionaries_xmls = global_context->getExternalDictionariesLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "dictionaries_config"));
models_xmls = global_context->getExternalModelsLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "models_config"));
}
catch (...)
{

View File

@ -13,17 +13,25 @@ namespace ErrorCodes
void AggregateFunctionCombinatorFactory::registerCombinator(const AggregateFunctionCombinatorPtr & value)
{
if (!dict.emplace(value->getName(), value).second)
throw Exception("AggregateFunctionCombinatorFactory: the name '" + value->getName() + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
CombinatorPair pair{
.name = value->getName(),
.combinator_ptr = value,
};
/// lower_bound() cannot be used since sort order of the dict is by length of the combinator
/// but there are just a few combiners, so not a problem.
if (std::find(dict.begin(), dict.end(), pair) != dict.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionCombinatorFactory: the name '{}' is not unique",
value->getName());
dict.emplace(std::lower_bound(dict.begin(), dict.end(), pair), pair);
}
AggregateFunctionCombinatorPtr AggregateFunctionCombinatorFactory::tryFindSuffix(const std::string & name) const
{
/// O(N) is ok for just a few combinators.
for (const auto & suffix_value : dict)
if (endsWith(name, suffix_value.first))
return suffix_value.second;
if (endsWith(name, suffix_value.name))
return suffix_value.combinator_ptr;
return {};
}

View File

@ -15,7 +15,17 @@ namespace DB
class AggregateFunctionCombinatorFactory final: private boost::noncopyable
{
private:
using Dict = std::unordered_map<std::string, AggregateFunctionCombinatorPtr>;
struct CombinatorPair
{
std::string name;
AggregateFunctionCombinatorPtr combinator_ptr;
bool operator==(const CombinatorPair & rhs) const { return name == rhs.name; }
/// Sort by the length of the combinator name for proper tryFindSuffix()
/// for combiners with common prefix (i.e. "State" and "SimpleState").
bool operator<(const CombinatorPair & rhs) const { return name.length() > rhs.name.length(); }
};
using Dict = std::vector<CombinatorPair>;
Dict dict;
public:

View File

@ -55,7 +55,7 @@ std::vector<Connection *> HedgedConnectionsFactory::getManyConnections(PoolMode
{
size_t min_entries = (settings && settings->skip_unavailable_shards) ? 0 : 1;
size_t max_entries;
size_t max_entries = 1;
switch (pool_mode)
{
case PoolMode::GET_ALL:

View File

@ -1,4 +1,5 @@
#include <Common/Config/AbstractConfigurationComparison.h>
#include <Common/getMultipleKeysFromConfig.h>
#include <unordered_set>
#include <common/StringRef.h>
@ -31,6 +32,23 @@ bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const P
return isSameConfiguration(left, key, right, key);
}
bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left, const Poco::Util::AbstractConfiguration & right, const String & root, const String & name)
{
if (&left == &right)
return true;
auto left_multiple_keys = getMultipleKeysFromConfig(left, root, name);
auto right_multiple_keys = getMultipleKeysFromConfig(right, root, name);
if (left_multiple_keys.size() != right_multiple_keys.size())
return false;
for (auto & key : left_multiple_keys)
if (!isSameConfiguration(left, right, concatKeyAndSubKey(root, key)))
return false;
return true;
}
bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left, const String & left_key,
const Poco::Util::AbstractConfiguration & right, const String & right_key)
{

View File

@ -13,6 +13,17 @@ namespace DB
bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left,
const Poco::Util::AbstractConfiguration & right);
/// Config may have multiple keys with one name. For example:
/// <root>
/// <some_key>...</some_key>
/// <some_key>...</some_key>
/// </root>
/// Returns true if the specified subview of the two configurations contains
/// the same keys and values for each key with the given name.
bool isSameConfigurationWithMultipleKeys(const Poco::Util::AbstractConfiguration & left,
const Poco::Util::AbstractConfiguration & right,
const String & root, const String & name);
/// Returns true if the specified subview of the two configurations contains the same keys and values.
bool isSameConfiguration(const Poco::Util::AbstractConfiguration & left,
const Poco::Util::AbstractConfiguration & right,

View File

@ -185,12 +185,20 @@ public:
/// Conversion of infinite values to integer is undefined.
throw Exception("Cannot convert infinite value to integer type", ErrorCodes::CANNOT_CONVERT_TYPE);
}
else if (x > std::numeric_limits<T>::max() || x < std::numeric_limits<T>::lowest())
{
throw Exception("Cannot convert out of range floating point value to integer type", ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
if constexpr (std::is_same_v<Decimal256, T>)
{
return Int256(x);
}
else
{
return T(x);
}
}
T operator() (const UInt128 &) const

View File

@ -78,7 +78,7 @@ class IColumn;
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "Number of threads performing background flush for tables with Buffer engine. Only has meaning at server startup.", 0) \
M(UInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \
M(UInt64, background_move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \
M(UInt64, background_fetches_pool_size, 3, "Number of threads performing background fetches for replicated tables. Only has meaning at server startup.", 0) \
M(UInt64, background_fetches_pool_size, 8, "Number of threads performing background fetches for replicated tables. Only has meaning at server startup.", 0) \
M(UInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables, dns cache updates. Only has meaning at server startup.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "Number of threads performing background tasks for message streaming. Only has meaning at server startup.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \
@ -118,6 +118,7 @@ class IColumn;
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(Bool, optimize_skip_unused_shards_rewrite_in, true, "Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards)", 0) \
M(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, "Allow non-deterministic functions (includes dictGet) in sharding_key for optimize_skip_unused_shards", 0) \
M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
M(UInt64, optimize_skip_unused_shards_nesting, 0, "Same as optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \

View File

@ -36,15 +36,15 @@
* int func() {
* #if USE_MULTITARGET_CODE
* if (isArchSupported(TargetArch::AVX2))
* return TargetSpecifc::AVX2::funcImpl();
* return TargetSpecific::AVX2::funcImpl();
* #endif
* return TargetSpecifc::Default::funcImpl();
* return TargetSpecific::Default::funcImpl();
* }
*
* Sometimes code may benefit from compiling with different options.
* For these purposes use DECLARE_MULTITARGET_CODE macros. It will create a copy
* of the code for every supported target and compile it with different options.
* These copies are available via TargetSpecifc namespaces described above.
* These copies are available via TargetSpecific namespaces described above.
*
* Inside every TargetSpecific namespace there is a constexpr variable BuildArch,
* which indicates the target platform for current code.
@ -106,7 +106,7 @@ String toString(TargetArch arch);
/* Clang shows warning when there aren't any objects to apply pragma.
* To prevent this warning we define this function inside every macros with pragmas.
*/
# define DUMMY_FUNCTION_DEFINITION void __dummy_function_definition();
# define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void __dummy_function_definition();
#else
# define BEGIN_AVX512F_SPECIFIC_CODE \
_Pragma("GCC push_options") \

View File

@ -480,7 +480,7 @@ public:
// since right now LUT does not support Int64-values and not format instructions for subsecond parts,
// treat DatTime64 values just as DateTime values by ignoring fractional and casting to UInt32.
const auto c = DecimalUtils::split(vec[i], scale);
instruction.perform(pos, static_cast<UInt32>(c.whole), time_zone);
instruction.perform(pos, static_cast<Int64>(c.whole), time_zone);
}
}
else

View File

@ -773,7 +773,8 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
while (!buf.eof() && isNumericASCII(*buf.position()))
++buf.position();
}
else if (scale && (whole >= 1000000000LL * scale))
/// 9908870400 is time_t value for 2184-01-01 UTC (a bit over the last year supported by DateTime64)
else if (whole >= 9908870400LL)
{
/// Unix timestamp with subsecond precision, already scaled to integer.
/// For disambiguation we support only time since 2001-09-09 01:46:40 UTC and less than 30 000 years in future.

View File

@ -259,7 +259,11 @@ void PocoHTTPClient::makeRequestInternal(
String error_message;
Poco::StreamCopier::copyToString(response_body_stream, error_message);
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
if (Aws::Http::IsRetryableHttpResponseCode(response->GetResponseCode()))
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
else
response->SetClientErrorType(Aws::Client::CoreErrors::USER_CANCELLED);
response->SetClientErrorMessage(error_message);
if (status_code == 429 || status_code == 503)

View File

@ -480,20 +480,15 @@ void CatBoostLibHolder::initAPI()
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
{
static std::weak_ptr<CatBoostLibHolder> ptr;
static std::shared_ptr<CatBoostLibHolder> ptr;
static std::mutex mutex;
std::lock_guard lock(mutex);
auto result = ptr.lock();
if (!result || result->getCurrentPath() != lib_path)
{
result = std::make_shared<CatBoostLibHolder>(lib_path);
/// This assignment is not atomic, which prevents from creating lock only inside 'if'.
ptr = result;
}
if (!ptr || ptr->getCurrentPath() != lib_path)
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
return result;
return ptr;
}
}

View File

@ -292,7 +292,7 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf
std::lock_guard lock(mutex);
/// If old congig is set, remove deleted clusters from impl, otherwise just clear it.
/// If old config is set, remove deleted clusters from impl, otherwise just clear it.
if (old_config)
{
for (const auto & key : deleted_keys)

View File

@ -55,6 +55,8 @@ public:
static Poco::Timespan saturate(const Poco::Timespan & v, const Poco::Timespan & limit);
public:
using SlotToShard = std::vector<UInt64>;
struct Address
{
/** In configuration file,
@ -232,7 +234,6 @@ public:
bool maybeCrossReplication() const;
private:
using SlotToShard = std::vector<UInt64>;
SlotToShard slot_to_shard;
public:

View File

@ -30,7 +30,7 @@ public:
virtual void createForShard(
const Cluster::ShardInfo & shard_info,
const String & query, const ASTPtr & query_ast,
const ASTPtr & query_ast,
ContextPtr context, const ThrottlerPtr & throttler,
const SelectQueryInfo & query_info,
std::vector<QueryPlanPtr> & res,

View File

@ -115,7 +115,7 @@ String formattedAST(const ASTPtr & ast)
void SelectStreamFactory::createForShard(
const Cluster::ShardInfo & shard_info,
const String &, const ASTPtr & query_ast,
const ASTPtr & query_ast,
ContextPtr context, const ThrottlerPtr & throttler,
const SelectQueryInfo &,
std::vector<QueryPlanPtr> & plans,

View File

@ -34,7 +34,7 @@ public:
void createForShard(
const Cluster::ShardInfo & shard_info,
const String & query, const ASTPtr & query_ast,
const ASTPtr & query_ast,
ContextPtr context, const ThrottlerPtr & throttler,
const SelectQueryInfo & query_info,
std::vector<QueryPlanPtr> & plans,

View File

@ -5,7 +5,7 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/IInterpreter.h>
#include <Interpreters/ProcessList.h>
#include <Parsers/queryToString.h>
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
#include <Processors/Pipe.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
@ -91,7 +91,10 @@ ContextPtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context,
void executeQuery(
QueryPlan & query_plan,
IStreamFactory & stream_factory, Poco::Logger * log,
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info)
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sharding_key_expr,
const std::string & sharding_key_column_name,
const ClusterPtr & not_optimized_cluster)
{
assert(log);
@ -104,9 +107,7 @@ void executeQuery(
Pipes remote_pipes;
Pipes delayed_pipes;
const std::string query = queryToString(query_ast);
auto new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log);
auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, log);
new_context->getClientInfo().distributed_depth += 1;
@ -127,9 +128,28 @@ void executeQuery(
else
throttler = user_level_throttler;
for (const auto & shard_info : query_info.cluster->getShardsInfo())
size_t shards = query_info.getCluster()->getShardCount();
for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
{
stream_factory.createForShard(shard_info, query, query_ast,
ASTPtr query_ast_for_shard;
if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
{
query_ast_for_shard = query_ast->clone();
OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
sharding_key_expr,
sharding_key_column_name,
shard_info,
not_optimized_cluster->getSlotToShard(),
};
OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
visitor.visit(query_ast_for_shard);
}
else
query_ast_for_shard = query_ast;
stream_factory.createForShard(shard_info,
query_ast_for_shard,
new_context, throttler, query_info, plans,
remote_pipes, delayed_pipes, log);
}

View File

@ -8,11 +8,15 @@ namespace DB
struct Settings;
class Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;
struct SelectQueryInfo;
class Pipe;
class QueryPlan;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
namespace ClusterProxy
{
@ -35,7 +39,10 @@ ContextPtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context,
void executeQuery(
QueryPlan & query_plan,
IStreamFactory & stream_factory, Poco::Logger * log,
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info);
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sharding_key_expr,
const std::string & sharding_key_column_name,
const ClusterPtr & not_optimized_cluster);
}

View File

@ -29,6 +29,7 @@
#include <Disks/DiskLocal.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/ActionLocksManager.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Core/Settings.h>
#include <Core/SettingsQuirks.h>
#include <Access/AccessControlManager.h>
@ -225,7 +226,6 @@ private:
void cleanThread()
{
setThreadName("SessionCleaner");
std::unique_lock lock{mutex};
while (true)
@ -338,6 +338,9 @@ struct ContextSharedPart
mutable std::optional<EmbeddedDictionaries> embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization.
mutable std::optional<ExternalDictionariesLoader> external_dictionaries_loader;
mutable std::optional<ExternalModelsLoader> external_models_loader;
ConfigurationPtr external_models_config;
ext::scope_guard models_repository_guard;
String default_profile_name; /// Default profile name used for default values.
String system_profile_name; /// Profile used by system processes
String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
@ -445,6 +448,7 @@ struct ContextSharedPart
system_logs.reset();
embedded_dictionaries.reset();
external_dictionaries_loader.reset();
models_repository_guard.reset();
external_models_loader.reset();
buffer_flush_schedule_pool.reset();
schedule_pool.reset();
@ -456,7 +460,6 @@ struct ContextSharedPart
trace_collector.reset();
/// Stop zookeeper connection
zookeeper.reset();
}
bool hasTraceCollector() const
@ -1353,11 +1356,29 @@ const ExternalModelsLoader & Context::getExternalModelsLoader() const
ExternalModelsLoader & Context::getExternalModelsLoader()
{
std::lock_guard lock(shared->external_models_mutex);
return getExternalModelsLoaderUnlocked();
}
ExternalModelsLoader & Context::getExternalModelsLoaderUnlocked()
{
if (!shared->external_models_loader)
shared->external_models_loader.emplace(getGlobalContext());
return *shared->external_models_loader;
}
void Context::setExternalModelsConfig(const ConfigurationPtr & config, const std::string & config_name)
{
std::lock_guard lock(shared->external_models_mutex);
if (shared->external_models_config && isSameConfigurationWithMultipleKeys(*config, *shared->external_models_config, "", config_name))
return;
shared->external_models_config = config;
shared->models_repository_guard .reset();
shared->models_repository_guard = getExternalModelsLoaderUnlocked().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(*config, config_name));
}
EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_error) const
{

View File

@ -499,8 +499,11 @@ public:
EmbeddedDictionaries & getEmbeddedDictionaries();
ExternalDictionariesLoader & getExternalDictionariesLoader();
ExternalModelsLoader & getExternalModelsLoader();
ExternalModelsLoader & getExternalModelsLoaderUnlocked();
void tryCreateEmbeddedDictionaries() const;
void setExternalModelsConfig(const ConfigurationPtr & config, const std::string & config_name = "models_config");
/// I/O formats.
BlockInputStreamPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size) const;

View File

@ -0,0 +1,110 @@
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/convertFieldToType.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <DataTypes/FieldToDataType.h>
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
namespace
{
using namespace DB;
Field executeFunctionOnField(
const Field & field, const std::string & name,
const ExpressionActionsPtr & expr,
const std::string & sharding_key_column_name)
{
DataTypePtr type = applyVisitor(FieldToDataType{}, field);
ColumnWithTypeAndName column;
column.column = type->createColumnConst(1, field);
column.name = name;
column.type = type;
Block block{column};
size_t num_rows = 1;
expr->execute(block, num_rows);
ColumnWithTypeAndName & ret = block.getByName(sharding_key_column_name);
return (*ret.column)[0];
}
/// Return true if shard may contain such value (or it is unknown), otherwise false.
bool shardContains(
const Field & sharding_column_value,
const std::string & sharding_column_name,
const ExpressionActionsPtr & expr,
const std::string & sharding_key_column_name,
const Cluster::ShardInfo & shard_info,
const Cluster::SlotToShard & slots)
{
/// NULL is not allowed in sharding key,
/// so it should be safe to assume that shard cannot contain it.
if (sharding_column_value.isNull())
return false;
Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, expr, sharding_key_column_name);
UInt64 value = sharding_value.get<UInt64>();
const auto shard_num = slots[value % slots.size()] + 1;
return shard_info.shard_num == shard_num;
}
}
namespace DB
{
bool OptimizeShardingKeyRewriteInMatcher::needChildVisit(ASTPtr & /*node*/, const ASTPtr & /*child*/)
{
return true;
}
void OptimizeShardingKeyRewriteInMatcher::visit(ASTPtr & node, Data & data)
{
if (auto * function = node->as<ASTFunction>())
visit(*function, data);
}
void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & data)
{
if (function.name != "in")
return;
auto * left = function.arguments->children.front().get();
auto * right = function.arguments->children.back().get();
auto * identifier = left->as<ASTIdentifier>();
if (!identifier)
return;
const auto & expr = data.sharding_key_expr;
const auto & sharding_key_column_name = data.sharding_key_column_name;
if (!expr->getRequiredColumnsWithTypes().contains(identifier->name()))
return;
/// NOTE: that we should not take care about empty tuple,
/// since after optimize_skip_unused_shards,
/// at least one element should match each shard.
if (auto * tuple_func = right->as<ASTFunction>(); tuple_func && tuple_func->name == "tuple")
{
auto * tuple_elements = tuple_func->children.front()->as<ASTExpressionList>();
std::erase_if(tuple_elements->children, [&](auto & child)
{
auto * literal = child->template as<ASTLiteral>();
return literal && !shardContains(literal->value, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
});
}
else if (auto * tuple_literal = right->as<ASTLiteral>();
tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple)
{
auto & tuple = tuple_literal->value.get<Tuple &>();
std::erase_if(tuple, [&](auto & child)
{
return !shardContains(child, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
});
}
}
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/Cluster.h>
namespace DB
{
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class ASTFunction;
/// Rewrite `sharding_key IN (...)` for specific shard,
/// so that it will contain only values that belong to this specific shard.
///
/// See also:
/// - evaluateExpressionOverConstantCondition()
/// - StorageDistributed::createSelector()
/// - createBlockSelector()
struct OptimizeShardingKeyRewriteInMatcher
{
/// Cluster::SlotToShard
using SlotToShard = std::vector<UInt64>;
struct Data
{
const ExpressionActionsPtr & sharding_key_expr;
const std::string & sharding_key_column_name;
const Cluster::ShardInfo & shard_info;
const Cluster::SlotToShard & slots;
};
static bool needChildVisit(ASTPtr & /*node*/, const ASTPtr & /*child*/);
static void visit(ASTPtr & node, Data & data);
static void visit(ASTFunction & function, Data & data);
};
using OptimizeShardingKeyRewriteInVisitor = InDepthNodeVisitor<OptimizeShardingKeyRewriteInMatcher, true>;
}

View File

@ -293,13 +293,11 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query,
else
{
ASTFunction * func = elem->as<ASTFunction>();
/// Never remove untuple. It's result column may be in required columns.
/// It is not easy to analyze untuple here, because types were not calculated yes.
if (func && func->name == "untuple")
for (const auto & col : required_result_columns)
if (col.rfind("_ut_", 0) == 0)
{
new_elements.push_back(elem);
break;
}
new_elements.push_back(elem);
}
}

View File

@ -118,6 +118,7 @@ SRCS(
OpenTelemetrySpanLog.cpp
OptimizeIfChains.cpp
OptimizeIfWithConstantConditionVisitor.cpp
OptimizeShardingKeyRewriteInVisitor.cpp
PartLog.cpp
PredicateExpressionsOptimizer.cpp
PredicateRewriteVisitor.cpp

View File

@ -78,7 +78,7 @@ void ArrowBlockInputFormat::prepareReader()
{
if (stream)
{
auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(asArrowFile(in));
auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(std::make_unique<ArrowInputStreamFromReadBuffer>(in));
if (!stream_reader_status.ok())
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
"Error while opening a table: {}", stream_reader_status.status().ToString());

View File

@ -55,26 +55,23 @@ arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Tell() const
{
return arrow::Result<int64_t>(in.getPosition());
return in.getPosition();
}
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, void * out)
{
int64_t bytes_read = in.readBig(reinterpret_cast<char *>(out), nbytes);
return arrow::Result<int64_t>(bytes_read);
return in.readBig(reinterpret_cast<char *>(out), nbytes);
}
arrow::Result<std::shared_ptr<arrow::Buffer>> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes)
{
auto buffer_status = arrow::AllocateBuffer(nbytes);
ARROW_RETURN_NOT_OK(buffer_status);
ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes))
ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()))
auto shared_buffer = std::shared_ptr<arrow::Buffer>(std::move(std::move(*buffer_status)));
if (bytes_read < nbytes)
RETURN_NOT_OK(buffer->Resize(bytes_read));
size_t n = in.readBig(reinterpret_cast<char *>(shared_buffer->mutable_data()), nbytes);
auto read_buffer = arrow::SliceBuffer(shared_buffer, 0, n);
return arrow::Result<std::shared_ptr<arrow::Buffer>>(shared_buffer);
return buffer;
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)
@ -83,6 +80,43 @@ arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)
return arrow::Status::OK();
}
ArrowInputStreamFromReadBuffer::ArrowInputStreamFromReadBuffer(ReadBuffer & in_) : in(in_), is_open{true}
{
}
arrow::Result<int64_t> ArrowInputStreamFromReadBuffer::Read(int64_t nbytes, void * out)
{
return in.readBig(reinterpret_cast<char *>(out), nbytes);
}
arrow::Result<std::shared_ptr<arrow::Buffer>> ArrowInputStreamFromReadBuffer::Read(int64_t nbytes)
{
ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes))
ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()))
if (bytes_read < nbytes)
RETURN_NOT_OK(buffer->Resize(bytes_read));
return buffer;
}
arrow::Status ArrowInputStreamFromReadBuffer::Abort()
{
return arrow::Status();
}
arrow::Result<int64_t> ArrowInputStreamFromReadBuffer::Tell() const
{
return in.count();
}
arrow::Status ArrowInputStreamFromReadBuffer::Close()
{
is_open = false;
return arrow::Status();
}
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in)
{
if (auto * fd_in = dynamic_cast<ReadBufferFromFileDescriptor *>(&in))

View File

@ -61,6 +61,24 @@ private:
ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromSeekableReadBuffer);
};
class ArrowInputStreamFromReadBuffer : public arrow::io::InputStream
{
public:
explicit ArrowInputStreamFromReadBuffer(ReadBuffer & in);
arrow::Result<int64_t> Read(int64_t nbytes, void* out) override;
arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override;
arrow::Status Abort() override;
arrow::Result<int64_t> Tell() const override;
arrow::Status Close() override;
bool closed() const override { return !is_open; }
private:
ReadBuffer & in;
bool is_open = false;
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer);
};
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in);
}

View File

@ -319,8 +319,8 @@ void MergeTreeData::checkProperties(
{
const String & pk_column = new_primary_key.column_names[i];
if (pk_column != sorting_key_column)
throw Exception("Primary key must be a prefix of the sorting key, but in position "
+ toString(i) + " its column is " + pk_column + ", not " + sorting_key_column,
throw Exception("Primary key must be a prefix of the sorting key, but the column in the position "
+ toString(i) + " is " + sorting_key_column +", not " + pk_column,
ErrorCodes::BAD_ARGUMENTS);
if (!primary_key_columns_set.emplace(pk_column).second)
@ -2880,7 +2880,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK);
if (parts.empty())
throw Exception("Nothing to move", ErrorCodes::NO_SUCH_DATA_PART);
throw Exception("Nothing to move (сheck that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART);
parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr)
{

View File

@ -119,9 +119,13 @@ struct SelectQueryInfo
ASTPtr query;
ASTPtr view_query; /// Optimized VIEW query
/// For optimize_skip_unused_shards.
/// Can be modified in getQueryProcessingStage()
/// Cluster for the query.
ClusterPtr cluster;
/// Optimized cluster for the query.
/// In case of optimize_skip_unused_shards it may differs from original cluster.
///
/// Configured in StorageDistributed::getQueryProcessingStage()
ClusterPtr optimized_cluster;
TreeRewriterResultPtr syntax_analyzer_result;
@ -134,6 +138,8 @@ struct SelectQueryInfo
/// Prepared sets are used for indices by storage engine.
/// Example: x IN (1, 2, 3)
PreparedSets sets;
ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; }
};
}

View File

@ -478,7 +478,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
"Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
makeFormattedListOfShards(optimized_cluster));
cluster = optimized_cluster;
query_info.cluster = cluster;
query_info.optimized_cluster = cluster;
}
else
{
@ -558,7 +558,7 @@ void StorageDistributed::read(
InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
/// Return directly (with correct header) if no shard to query.
if (query_info.cluster->getShardsInfo().empty())
if (query_info.getCluster()->getShardsInfo().empty())
{
Pipe pipe(std::make_shared<NullSource>(header));
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
@ -586,7 +586,9 @@ void StorageDistributed::read(
local_context->getExternalTables());
ClusterProxy::executeQuery(query_plan, select_stream_factory, log,
modified_query_ast, local_context, query_info);
modified_query_ast, local_context, query_info,
sharding_key_expr, sharding_key_column_name,
getCluster());
/// This is a bug, it is possible only when there is no shards to query, and this is handled earlier.
if (!query_plan.isInitialized())
@ -952,7 +954,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS);
}
return cluster;
return {};
}
IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)

View File

@ -17,8 +17,8 @@ void StorageSystemAggregateFunctionCombinators::fillData(MutableColumns & res_co
const auto & combinators = AggregateFunctionCombinatorFactory::instance().getAllAggregateFunctionCombinators();
for (const auto & pair : combinators)
{
res_columns[0]->insert(pair.first);
res_columns[1]->insert(pair.second->isForInternalUsageOnly());
res_columns[0]->insert(pair.name);
res_columns[1]->insert(pair.combinator_ptr->isForInternalUsageOnly());
}
}

View File

@ -54,6 +54,26 @@ def run_and_check(args, env=None, shell=False, stdout=subprocess.PIPE, stderr=su
raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8')))
def retry_exception(num, delay, func, exception=Exception, *args, **kwargs):
"""
Retry if `func()` throws, `num` times.
:param func: func to run
:param num: number of retries
:throws StopIteration
"""
i = 0
while i <= num:
try:
func(*args, **kwargs)
time.sleep(delay)
except exception: # pylint: disable=broad-except
i += 1
continue
return
raise StopIteration('Function did not finished successfully')
def subprocess_check_call(args):
# Uncomment for debugging
# print('run:', ' ' . join(args))
@ -632,16 +652,6 @@ class ClickHouseCluster:
if self.is_up:
return
# Just in case kill unstopped containers from previous launch
try:
print("Trying to kill unstopped containers...")
if not subprocess_call(['docker-compose', 'kill']):
subprocess_call(['docker-compose', 'down', '--volumes'])
print("Unstopped containers killed")
except:
pass
try:
if destroy_dirs and p.exists(self.instances_dir):
print(("Removing instances dir %s", self.instances_dir))
@ -651,9 +661,24 @@ class ClickHouseCluster:
print(('Setup directory for instance: {} destroy_dirs: {}'.format(instance.name, destroy_dirs)))
instance.create_dir(destroy_dir=destroy_dirs)
# In case of multiple cluster we should not stop compose services.
if destroy_dirs:
# Just in case kill unstopped containers from previous launch
try:
print("Trying to kill unstopped containers...")
subprocess_call(['docker-compose', 'kill'])
subprocess_call(self.base_cmd + ['down', '--volumes', '--remove-orphans'])
print("Unstopped containers killed")
except:
pass
clickhouse_pull_cmd = self.base_cmd + ['pull']
print(f"Pulling images for {self.base_cmd}")
retry_exception(10, 5, subprocess_check_call, Exception, clickhouse_pull_cmd)
self.docker_client = docker.from_env(version=self.docker_api_version)
common_opts = ['up', '-d', '--force-recreate']
common_opts = ['up', '-d']
if self.with_zookeeper and self.base_zookeeper_cmd:
print('Setup ZooKeeper')
@ -735,7 +760,7 @@ class ClickHouseCluster:
if self.with_redis and self.base_redis_cmd:
print('Setup Redis')
subprocess_check_call(self.base_redis_cmd + ['up', '-d', '--force-recreate'])
subprocess_check_call(self.base_redis_cmd + ['up', '-d'])
time.sleep(10)
if self.with_minio and self.base_minio_cmd:
@ -769,7 +794,7 @@ class ClickHouseCluster:
os.environ.pop('SSL_CERT_FILE')
if self.with_cassandra and self.base_cassandra_cmd:
subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate'])
subprocess_check_call(self.base_cassandra_cmd + ['up', '-d'])
self.wait_cassandra_to_start()
clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']

View File

@ -0,0 +1,3 @@
<yandex>
<catboost_dynamic_library_path>/etc/clickhouse-server/model/libcatboostmodel.so</catboost_dynamic_library_path>
</yandex>

View File

@ -0,0 +1,2 @@
<yandex>
</yandex>

View File

@ -0,0 +1,8 @@
<models>
<model>
<type>catboost</type>
<name>model1</name>
<path>/etc/clickhouse-server/model/model.bin</path>
<lifetime>0</lifetime>
</model>
</models>

View File

@ -0,0 +1,8 @@
<models>
<model>
<type>catboost</type>
<name>model2</name>
<path>/etc/clickhouse-server/model/model.bin</path>
<lifetime>0</lifetime>
</model>
</models>

View File

@ -0,0 +1,58 @@
import os
import sys
import time
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', stay_alive=True, main_configs=['config/models_config.xml', 'config/catboost_lib.xml'])
def copy_file_to_container(local_path, dist_path, container_id):
os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path))
config = '''<yandex>
<models_config>/etc/clickhouse-server/model/{model_config}</models_config>
</yandex>'''
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
copy_file_to_container(os.path.join(SCRIPT_DIR, 'model/.'), '/etc/clickhouse-server/model', node.docker_id)
node.restart_clickhouse()
yield cluster
finally:
cluster.shutdown()
def change_config(model_config):
node.replace_config("/etc/clickhouse-server/config.d/models_config.xml", config.format(model_config=model_config))
node.query("SYSTEM RELOAD CONFIG;")
def test(started_cluster):
# Set config with the path to the first model.
change_config("model_config.xml")
node.query("SELECT modelEvaluate('model1', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")
# Change path to the second model in config.
change_config("model_config2.xml")
# Check that the new model is loaded.
node.query("SELECT modelEvaluate('model2', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")
# Check that the old model was unloaded.
node.query_and_get_error("SELECT modelEvaluate('model1', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);")

View File

@ -0,0 +1,7 @@
<yandex>
<profiles>
<default>
<background_fetches_pool_size>3</background_fetches_pool_size>
</default>
</profiles>
</yandex>

View File

@ -6,12 +6,14 @@ from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
import random
import string
import os
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True)
node2 = cluster.add_instance('node2', with_zookeeper=True)
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
node1 = cluster.add_instance('node1', user_configs=['configs/custom_settings.xml'], with_zookeeper=True)
node2 = cluster.add_instance('node2', user_configs=['configs/custom_settings.xml'], with_zookeeper=True)
DEFAULT_MAX_THREADS_FOR_FETCH = 3
MAX_THREADS_FOR_FETCH = 3
@pytest.fixture(scope="module")
def started_cluster():
@ -64,11 +66,11 @@ def test_limited_fetches(started_cluster):
time.sleep(0.1)
for concurrently_fetching_parts in fetches_result:
if len(concurrently_fetching_parts) > DEFAULT_MAX_THREADS_FOR_FETCH:
assert False, "Found more than {} concurrently fetching parts: {}".format(DEFAULT_MAX_THREADS_FOR_FETCH, ', '.join(concurrently_fetching_parts))
if len(concurrently_fetching_parts) > MAX_THREADS_FOR_FETCH:
assert False, "Found more than {} concurrently fetching parts: {}".format(MAX_THREADS_FOR_FETCH, ', '.join(concurrently_fetching_parts))
assert max([len(parts) for parts in fetches_result]) == 3, "Strange, but we don't utilize max concurrent threads for fetches"
assert(max(background_fetches_metric)) == 3, "Just checking metric consistent with table"
node1.query("DROP TABLE IF EXISTS t SYNC")
node2.query("DROP TABLE IF EXISTS t SYNC")
node2.query("DROP TABLE IF EXISTS t SYNC")

View File

@ -12,7 +12,7 @@ SETTINGS index_granularity = 8192;
INSERT INTO t0 VALUES (0, 0);
SELECT t0.c1 FROM t0 WHERE NOT (t0.c1 OR (t0.c0 AND -1524532316));
SELECT t0.c1 FROM t0 WHERE NOT (t0.c1 OR (t0.c0 AND -1.0));
SELECT t0.c1 FROM t0 WHERE NOT (t0.c1 OR (t0.c0 AND -1.0)); -- { serverError 70 }
SELECT t0.c1 FROM t0 WHERE NOT (t0.c1 OR (t0.c0 AND inf));
SELECT t0.c1 FROM t0 WHERE NOT (t0.c1 OR (t0.c0 AND nan));

View File

@ -0,0 +1,22 @@
(0, 2)
0 0
0 0
WITH CAST(\'default\', \'String\') AS id_no SELECT one.dummy, ignore(id_no) FROM system.one WHERE dummy IN (0, 2)
WITH CAST(\'default\', \'String\') AS id_no SELECT one.dummy, ignore(id_no) FROM system.one WHERE dummy IN (0, 2)
optimize_skip_unused_shards_rewrite_in(0, 2)
0 0
WITH CAST(\'default\', \'String\') AS id_02 SELECT one.dummy, ignore(id_02) FROM system.one WHERE dummy IN tuple(0)
WITH CAST(\'default\', \'String\') AS id_02 SELECT one.dummy, ignore(id_02) FROM system.one WHERE dummy IN tuple(2)
optimize_skip_unused_shards_rewrite_in(2,)
WITH CAST(\'default\', \'String\') AS id_2 SELECT one.dummy, ignore(id_2) FROM system.one WHERE dummy IN tuple(2)
optimize_skip_unused_shards_rewrite_in(0,)
0 0
WITH CAST(\'default\', \'String\') AS id_0 SELECT one.dummy, ignore(id_0) FROM system.one WHERE dummy IN tuple(0)
errors
others
0
0
0
optimize_skip_unused_shards_limit
0
0

View File

@ -0,0 +1,127 @@
-- NOTE: this test cannot use 'current_database = currentDatabase()',
-- because it does not propagated via remote queries,
-- hence it uses 'with (select currentDatabase()) as X'
-- (with subquery to expand it on the initiator).
drop table if exists dist_01756;
drop table if exists dist_01756_str;
drop table if exists data_01756_str;
-- SELECT
-- intHash64(0) % 2,
-- intHash64(2) % 2
-- ┌─modulo(intHash64(0), 2)─┬─modulo(intHash64(2), 2)─┐
-- │ 0 │ 1 │
-- └─────────────────────────┴─────────────────────────┘
create table dist_01756 as system.one engine=Distributed(test_cluster_two_shards, system, one, intHash64(dummy));
-- separate log entry for localhost queries
set prefer_localhost_replica=0;
set force_optimize_skip_unused_shards=2;
set optimize_skip_unused_shards=1;
set optimize_skip_unused_shards_rewrite_in=0;
set log_queries=1;
--
-- w/o optimize_skip_unused_shards_rewrite_in=1
--
select '(0, 2)';
with (select currentDatabase()) as id_no select *, ignore(id_no) from dist_01756 where dummy in (0, 2);
system flush logs;
select query from system.query_log where
event_date = today() and
event_time > now() - interval 1 hour and
not is_initial_query and
query not like '%system.query_log%' and
query like concat('WITH%', currentDatabase(), '%AS id_no %') and
type = 'QueryFinish'
order by query;
--
-- w/ optimize_skip_unused_shards_rewrite_in=1
--
set optimize_skip_unused_shards_rewrite_in=1;
-- detailed coverage for realistic examples
select 'optimize_skip_unused_shards_rewrite_in(0, 2)';
with (select currentDatabase()) as id_02 select *, ignore(id_02) from dist_01756 where dummy in (0, 2);
system flush logs;
select query from system.query_log where
event_date = today() and
event_time > now() - interval 1 hour and
not is_initial_query and
query not like '%system.query_log%' and
query like concat('WITH%', currentDatabase(), '%AS id_02 %') and
type = 'QueryFinish'
order by query;
select 'optimize_skip_unused_shards_rewrite_in(2,)';
with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,);
system flush logs;
select query from system.query_log where
event_date = today() and
event_time > now() - interval 1 hour and
not is_initial_query and
query not like '%system.query_log%' and
query like concat('WITH%', currentDatabase(), '%AS id_2 %') and
type = 'QueryFinish'
order by query;
select 'optimize_skip_unused_shards_rewrite_in(0,)';
with (select currentDatabase()) as id_0 select *, ignore(id_0) from dist_01756 where dummy in (0,);
system flush logs;
select query from system.query_log where
event_date = today() and
event_time > now() - interval 1 hour and
not is_initial_query and
query not like '%system.query_log%' and
query like concat('WITH%', currentDatabase(), '%AS id_0 %') and
type = 'QueryFinish'
order by query;
--
-- errors
--
select 'errors';
-- not tuple
select * from dist_01756 where dummy in (0); -- { serverError 507 }
-- optimize_skip_unused_shards does not support non-constants
select * from dist_01756 where dummy in (select * from system.one); -- { serverError 507 }
select * from dist_01756 where dummy in (toUInt8(0)); -- { serverError 507 }
-- wrong type
select * from dist_01756 where dummy in ('0'); -- { serverError 507 }
-- NOT IN does not supported
select * from dist_01756 where dummy not in (0, 2); -- { serverError 507 }
--
-- others
--
select 'others';
select * from dist_01756 where dummy not in (2, 3) and dummy in (0, 2);
select * from dist_01756 where dummy in tuple(0, 2);
select * from dist_01756 where dummy in tuple(0);
select * from dist_01756 where dummy in tuple(2);
-- Identifier is NULL
select (2 IN (2,)), * from dist_01756 where dummy in (0, 2) format Null;
-- Literal is NULL
select (dummy IN (toUInt8(2),)), * from dist_01756 where dummy in (0, 2) format Null;
-- different type
create table data_01756_str (key String) engine=Memory();
create table dist_01756_str as data_01756_str engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01756_str, cityHash64(key));
select * from dist_01756_str where key in ('0', '2');
select * from dist_01756_str where key in ('0', Null); -- { serverError 507 }
select * from dist_01756_str where key in (0, 2); -- { serverError 53 }
select * from dist_01756_str where key in (0, Null); -- { serverError 53 }
-- optimize_skip_unused_shards_limit
select 'optimize_skip_unused_shards_limit';
select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1; -- { serverError 507 }
select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1, force_optimize_skip_unused_shards=0;
drop table dist_01756;
drop table dist_01756_str;
drop table data_01756_str;

View File

@ -0,0 +1,27 @@
-- { echo }
SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
19
SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
19
SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
19
SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C');
20
SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
21
SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
22
-- non-zero scale
SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
19
SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
19
SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
19
SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C');
20
SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
21
SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C');
22

View File

@ -0,0 +1,16 @@
-- { echo }
SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow'), '%C');
-- non-zero scale
SELECT formatDateTime(toDateTime64('1935-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('1969-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('1989-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('2019-09-16 19:20:12', 0, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('2105-12-12 12:12:12', 6, 'Europe/Moscow'), '%C');
SELECT formatDateTime(toDateTime64('2205-01-12 12:12:12', 6, 'Europe/Moscow'), '%C');

View File

@ -0,0 +1,10 @@
-- { echo }
SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC');
2205-12-12 12:12:12
SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow');
2205-12-12 12:12:12
SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow');
2205-12-12 12:12:12.000000
SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow');
2205-12-12 12:12:12.000000

View File

@ -0,0 +1,7 @@
-- { echo }
SELECT toDateTime64('2205-12-12 12:12:12', 0, 'UTC');
SELECT toDateTime64('2205-12-12 12:12:12', 0, 'Europe/Moscow');
SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow');
SELECT toDateTime64('2205-12-12 12:12:12', 6, 'Europe/Moscow');

View File

@ -0,0 +1,2 @@
(0.5,'92233720368547758.07',NULL) 1.00 256 \N \N
\N

View File

@ -0,0 +1,3 @@
SELECT (0.5, '92233720368547758.07', NULL), '', '1.00', untuple(('256', NULL)), NULL FROM (SELECT untuple(((NULL, untuple((('0.0000000100', (65536, NULL, (65535, 9223372036854775807), '25.7', (0.00009999999747378752, '10.25', 1048577), 65536)), '0.0000001024', '65537', NULL))), untuple((9223372036854775807, -inf, 0.5)), NULL, -9223372036854775808)), 257, 7, ('0.0001048575', (1024, NULL, (7, 3), (untuple(tuple(-NULL)), NULL, '0.0001048577', NULL), 0)), 0, (0, 0.9998999834060669, '65537'), untuple(tuple('10.25')));
SELECT NULL FROM (SELECT untuple((NULL, dummy)));

View File

@ -0,0 +1,2 @@
SELECT uniqUpTo(1e100)(number) FROM numbers(5); -- { serverError 70 }
SELECT uniqUpTo(-1e100)(number) FROM numbers(5); -- { serverError 70 }

View File

@ -1,7 +1,10 @@
v21.4.3.21-stable 2021-04-12
v21.3.6.55-lts 2021-04-12
v21.3.5.42-lts 2021-04-07
v21.3.4.25-lts 2021-03-28
v21.3.3.14-lts 2021-03-19
v21.3.2.5-lts 2021-03-12
v21.2.9.41-stable 2021-04-12
v21.2.8.31-stable 2021-04-07
v21.2.7.11-stable 2021-03-28
v21.2.6.1-stable 2021-03-15

1 v21.3.5.42-lts v21.4.3.21-stable 2021-04-07 2021-04-12
1 v21.4.3.21-stable 2021-04-12
2 v21.3.6.55-lts 2021-04-12
3 v21.3.5.42-lts v21.3.5.42-lts 2021-04-07 2021-04-07
4 v21.3.4.25-lts v21.3.4.25-lts 2021-03-28 2021-03-28
5 v21.3.3.14-lts v21.3.3.14-lts 2021-03-19 2021-03-19
6 v21.3.2.5-lts v21.3.2.5-lts 2021-03-12 2021-03-12
7 v21.2.9.41-stable 2021-04-12
8 v21.2.8.31-stable v21.2.8.31-stable 2021-04-07 2021-04-07
9 v21.2.7.11-stable v21.2.7.11-stable 2021-03-28 2021-03-28
10 v21.2.6.1-stable v21.2.6.1-stable 2021-03-15 2021-03-15