mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 01:00:48 +00:00
Add support of any partition key. [#CLICKHOUSE-3606]
This commit is contained in:
parent
585b80acf5
commit
fbe4066c15
@ -46,6 +46,7 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <DataStreams/RemoteBlockInputStream.h>
|
||||
#include <DataStreams/SquashingBlockInputStream.h>
|
||||
@ -88,8 +89,20 @@ static ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_da
|
||||
namespace
|
||||
{
|
||||
|
||||
|
||||
using DatabaseAndTableName = std::pair<String, String>;
|
||||
|
||||
String getDatabaseDotTable(const String & database, const String & table)
|
||||
{
|
||||
return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
|
||||
}
|
||||
|
||||
String getDatabaseDotTable(const DatabaseAndTableName & db_and_table)
|
||||
{
|
||||
return getDatabaseDotTable(db_and_table.first, db_and_table.second);
|
||||
}
|
||||
|
||||
|
||||
|
||||
enum class TaskState
|
||||
{
|
||||
@ -138,35 +151,24 @@ struct TaskStateWithOwner
|
||||
|
||||
|
||||
/// Hierarchical description of the tasks
|
||||
struct TaskPartition;
|
||||
struct ShardPartition;
|
||||
struct TaskShard;
|
||||
struct TaskTable;
|
||||
struct TaskCluster;
|
||||
struct ClusterPartition;
|
||||
|
||||
using TasksPartition = std::map<String, TaskPartition>;
|
||||
using TasksPartition = std::map<String, ShardPartition>;
|
||||
using ShardInfo = Cluster::ShardInfo;
|
||||
using TaskShardPtr = std::shared_ptr<TaskShard>;
|
||||
using TasksShard = std::vector<TaskShardPtr>;
|
||||
using TasksTable = std::list<TaskTable>;
|
||||
using ClusterPartitions = std::map<String, ClusterPartition>;
|
||||
|
||||
/// Contains all cluster shards (sorted by neighborhood) containig a partition
|
||||
struct ClusterPartition
|
||||
|
||||
/// Just destination partition of a shard
|
||||
struct ShardPartition
|
||||
{
|
||||
TasksShard shards; /// having that partition
|
||||
|
||||
Stopwatch watch;
|
||||
UInt64 bytes_copied = 0;
|
||||
UInt64 rows_copied = 0;
|
||||
|
||||
size_t total_tries = 0;
|
||||
};
|
||||
|
||||
|
||||
struct TaskPartition
|
||||
{
|
||||
TaskPartition(TaskShard & parent, const String & name_) : task_shard(parent), name(name_) {}
|
||||
ShardPartition(TaskShard & parent, const String & name_quoted_) : task_shard(parent), name(name_quoted_) {}
|
||||
|
||||
String getPartitionPath() const;
|
||||
String getCommonPartitionIsDirtyPath() const;
|
||||
@ -204,11 +206,39 @@ struct TaskShard
|
||||
UInt32 numberInCluster() const { return info.shard_num; }
|
||||
UInt32 indexInCluster() const { return info.shard_num - 1; }
|
||||
|
||||
TasksPartition partitions;
|
||||
String getDescription() const;
|
||||
|
||||
/// Used to sort clusters by thier proximity
|
||||
ShardPriority priority;
|
||||
|
||||
/// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
|
||||
ColumnWithTypeAndName partition_key_column;
|
||||
|
||||
/// There is a task for each destination partition
|
||||
TasksPartition partition_tasks;
|
||||
|
||||
/// Last CREATE TABLE query of the table of the shard
|
||||
ASTPtr current_pull_table_create_query;
|
||||
|
||||
/// Internal distributed tables
|
||||
DatabaseAndTableName table_read_shard;
|
||||
DatabaseAndTableName table_split_shard;
|
||||
};
|
||||
|
||||
|
||||
/// Contains all cluster shards that contain a partition (and sorted by the proximity)
|
||||
struct ClusterPartition
|
||||
{
|
||||
TasksShard shards; /// having that partition
|
||||
|
||||
Stopwatch watch;
|
||||
UInt64 bytes_copied = 0;
|
||||
UInt64 rows_copied = 0;
|
||||
|
||||
size_t total_tries = 0;
|
||||
};
|
||||
|
||||
|
||||
struct TaskTable
|
||||
{
|
||||
TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix,
|
||||
@ -235,6 +265,7 @@ struct TaskTable
|
||||
/// Storage of destination table
|
||||
String engine_push_str;
|
||||
ASTPtr engine_push_ast;
|
||||
ASTPtr engine_push_partition_key_ast;
|
||||
|
||||
/// Local Distributed table used to split data
|
||||
DatabaseAndTableName table_split;
|
||||
@ -252,7 +283,8 @@ struct TaskTable
|
||||
|
||||
/// Filter partitions that should be copied
|
||||
bool has_enabled_partitions = false;
|
||||
NameSet enabled_partitions;
|
||||
Strings enabled_partitions;
|
||||
NameSet enabled_partitions_set;
|
||||
|
||||
/// Prioritized list of shards
|
||||
TasksShard all_shards;
|
||||
@ -277,6 +309,7 @@ struct TaskTable
|
||||
void initShards(RandomEngine && random_engine);
|
||||
};
|
||||
|
||||
|
||||
struct TaskCluster
|
||||
{
|
||||
TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
|
||||
@ -284,6 +317,7 @@ struct TaskCluster
|
||||
|
||||
void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
|
||||
|
||||
/// Set (or update) settings and max_workers param
|
||||
void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
|
||||
|
||||
/// Base node for all tasks. Its structure:
|
||||
@ -315,17 +349,6 @@ struct TaskCluster
|
||||
};
|
||||
|
||||
|
||||
String getDatabaseDotTable(const String & database, const String & table)
|
||||
{
|
||||
return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
|
||||
}
|
||||
|
||||
String getDatabaseDotTable(const DatabaseAndTableName & db_and_table)
|
||||
{
|
||||
return getDatabaseDotTable(db_and_table.first, db_and_table.second);
|
||||
}
|
||||
|
||||
|
||||
/// Atomically checks that is_dirty node is not exists, and made the remaining op
|
||||
/// Returns relative number of failed operation in the second field (the passed op has 0 index)
|
||||
static void checkNoNodeAndCommit(
|
||||
@ -381,42 +404,43 @@ Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream)
|
||||
return squashStreamIntoOneBlock(stream)->read();
|
||||
}
|
||||
|
||||
// Path getters
|
||||
|
||||
/// Path getters
|
||||
|
||||
String TaskTable::getPartitionPath(const String & partition_name) const
|
||||
{
|
||||
return task_cluster.task_zookeeper_path // root
|
||||
+ "/tables/" + table_id // tables/dst_cluster.merge.hits
|
||||
+ "/" + partition_name; // 201701
|
||||
return task_cluster.task_zookeeper_path // root
|
||||
+ "/tables/" + table_id // tables/dst_cluster.merge.hits
|
||||
+ "/" + escapeForFileName(partition_name); // 201701
|
||||
}
|
||||
|
||||
String TaskPartition::getPartitionPath() const
|
||||
String ShardPartition::getPartitionPath() const
|
||||
{
|
||||
return task_shard.task_table.getPartitionPath(name);
|
||||
}
|
||||
|
||||
String TaskPartition::getShardStatusPath() const
|
||||
String ShardPartition::getShardStatusPath() const
|
||||
{
|
||||
// /root/table_test.hits/201701/1
|
||||
return getPartitionPath() + "/shards/" + toString(task_shard.numberInCluster());
|
||||
}
|
||||
|
||||
String TaskPartition::getPartitionShardsPath() const
|
||||
String ShardPartition::getPartitionShardsPath() const
|
||||
{
|
||||
return getPartitionPath() + "/shards";
|
||||
}
|
||||
|
||||
String TaskPartition::getPartitionActiveWorkersPath() const
|
||||
String ShardPartition::getPartitionActiveWorkersPath() const
|
||||
{
|
||||
return getPartitionPath() + "/partition_active_workers";
|
||||
}
|
||||
|
||||
String TaskPartition::getActiveWorkerPath() const
|
||||
String ShardPartition::getActiveWorkerPath() const
|
||||
{
|
||||
return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
|
||||
}
|
||||
|
||||
String TaskPartition::getCommonPartitionIsDirtyPath() const
|
||||
String ShardPartition::getCommonPartitionIsDirtyPath() const
|
||||
{
|
||||
return getPartitionPath() + "/is_dirty";
|
||||
}
|
||||
@ -426,6 +450,58 @@ String TaskTable::getPartitionIsDirtyPath(const String & partition_name) const
|
||||
return getPartitionPath(partition_name) + "/is_dirty";
|
||||
}
|
||||
|
||||
String DB::TaskShard::getDescription() const
|
||||
{
|
||||
return "№" + toString(numberInCluster())
|
||||
+ " of pull table " + getDatabaseDotTable(task_table.table_pull)
|
||||
+ " of cluster " + task_table.cluster_pull_name;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static bool isExtedndedDefinitionStorage(const ASTPtr & storage_ast)
|
||||
{
|
||||
const ASTStorage & storage = typeid_cast<const ASTStorage &>(*storage_ast);
|
||||
return storage.partition_by || storage.order_by || storage.sample_by;
|
||||
}
|
||||
|
||||
static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
|
||||
{
|
||||
String storage_str = queryToString(storage_ast);
|
||||
|
||||
const ASTStorage & storage = typeid_cast<const ASTStorage &>(*storage_ast);
|
||||
const ASTFunction & engine = typeid_cast<const ASTFunction &>(*storage.engine);
|
||||
|
||||
if (!endsWith(engine.name, "MergeTree"))
|
||||
{
|
||||
throw Exception("Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
ASTPtr arguments_ast = engine.arguments->clone();
|
||||
ASTs & arguments = typeid_cast<ASTExpressionList &>(*arguments_ast).children;
|
||||
|
||||
if (isExtedndedDefinitionStorage(storage_ast))
|
||||
{
|
||||
if (storage.partition_by)
|
||||
return storage.partition_by->clone();
|
||||
|
||||
static const char * all = "all";
|
||||
return std::make_shared<ASTLiteral>(StringRange(all, all + strlen(all)), Field(all, strlen(all)));
|
||||
}
|
||||
else
|
||||
{
|
||||
bool is_replicated = startsWith(engine.name, "Replicated");
|
||||
size_t min_args = is_replicated ? 3 : 1;
|
||||
|
||||
if (arguments.size() < min_args)
|
||||
throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1];
|
||||
return makeASTFunction("toYYYYMM", month_arg->clone());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix_,
|
||||
const String & table_key)
|
||||
@ -453,6 +529,7 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati
|
||||
{
|
||||
ParserStorage parser_storage;
|
||||
engine_push_ast = parseQuery(parser_storage, engine_push_str);
|
||||
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
|
||||
}
|
||||
|
||||
sharding_key_str = config.getString(table_prefix + "sharding_key");
|
||||
@ -482,13 +559,12 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati
|
||||
Strings keys;
|
||||
config.keys(enabled_partitions_prefix, keys);
|
||||
|
||||
Strings partitions;
|
||||
if (keys.empty())
|
||||
{
|
||||
/// Parse list of partition from space-separated string
|
||||
String partitions_str = config.getString(table_prefix + "enabled_partitions");
|
||||
boost::trim_if(partitions_str, isWhitespaceASCII);
|
||||
boost::split(partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
|
||||
boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -498,13 +574,12 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati
|
||||
if (!startsWith(key, "partition"))
|
||||
throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
|
||||
|
||||
partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
|
||||
enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
|
||||
}
|
||||
}
|
||||
|
||||
std::copy(partitions.begin(), partitions.end(), std::inserter(enabled_partitions, enabled_partitions.begin()));
|
||||
std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -670,9 +745,11 @@ public:
|
||||
|
||||
LOG_DEBUG(log, "Loaded " << task_cluster->table_tasks.size() << " table tasks");
|
||||
|
||||
/// Compute set of partitions, set of partitions aren't changed
|
||||
/// Compute set of partitions, assume set of partitions aren't changed during the processing
|
||||
for (auto & task_table : task_cluster->table_tasks)
|
||||
{
|
||||
LOG_DEBUG(log, "Set up table task " << task_table.table_id);
|
||||
|
||||
for (const TaskShardPtr & task_shard : task_table.all_shards)
|
||||
{
|
||||
if (task_shard->info.pool == nullptr)
|
||||
@ -681,36 +758,72 @@ public:
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Set up table task " << task_table.table_id << " (pull from "
|
||||
<< "cluster " << task_table.cluster_pull_name
|
||||
<< ", table " << getDatabaseDotTable(task_table.table_pull)
|
||||
<< ", shard " << task_shard->info.shard_num << ")");
|
||||
LOG_DEBUG(log, "Set up shard " << task_shard->getDescription());
|
||||
LOG_DEBUG(log, "There are " << task_table.all_shards.size() << " shards, " << task_table.local_shards.size() << " of them are local ones");
|
||||
|
||||
LOG_DEBUG(log, "There are "
|
||||
<< task_table.all_shards.size() << " shards, "
|
||||
<< task_table.local_shards.size() << " of them are remote ones");
|
||||
auto existing_partitions_names = getShardPartitions(*task_shard);
|
||||
Strings filtered_partitions_names;
|
||||
|
||||
auto connection_entry = task_shard->info.pool->get(&task_cluster->settings_pull);
|
||||
LOG_DEBUG(log, "Will get meta information for shard " << task_shard->numberInCluster()
|
||||
<< " from replica " << connection_entry->getDescription());
|
||||
|
||||
Strings partitions = getRemotePartitions(task_table.table_pull, *connection_entry, &task_cluster->settings_pull);
|
||||
for (const String & partition_name : partitions)
|
||||
/// Check that user specified correct partition names
|
||||
auto check_partition_format = [&] (const String & partition_text_quoted)
|
||||
{
|
||||
/// Do not process partition if it is not in enabled_partitions list
|
||||
if (task_table.has_enabled_partitions && !task_table.enabled_partitions.count(partition_name))
|
||||
const DataTypePtr & type = task_shard->partition_key_column.type;
|
||||
MutableColumnPtr column_dummy = type->createColumn();
|
||||
ReadBufferFromString rb(partition_text_quoted);
|
||||
|
||||
try
|
||||
{
|
||||
LOG_DEBUG(log, "Will skip partition " << partition_name);
|
||||
continue;
|
||||
type->deserializeTextQuoted(*column_dummy, rb);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
throw Exception("Partition " + partition_text_quoted + " has incorrect format. " + e.displayText(), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
};
|
||||
|
||||
if (task_table.has_enabled_partitions)
|
||||
{
|
||||
/// Process partition in order specified by <enabled_partitions/>
|
||||
for (const String & partition_name : task_table.enabled_partitions)
|
||||
{
|
||||
check_partition_format(partition_name);
|
||||
auto it = existing_partitions_names.find(partition_name);
|
||||
|
||||
/// Do not process partition if it is not in enabled_partitions list
|
||||
if (it == existing_partitions_names.end())
|
||||
{
|
||||
LOG_WARNING(log, "There is no enabled " << partition_name << " specified in enabled_partitions in shard "
|
||||
<< task_shard->getDescription());
|
||||
continue;
|
||||
}
|
||||
|
||||
filtered_partitions_names.emplace_back(*it);
|
||||
}
|
||||
|
||||
task_shard->partitions.emplace(partition_name, TaskPartition(*task_shard, partition_name));
|
||||
for (const String & partition_name : existing_partitions_names)
|
||||
{
|
||||
if (!task_table.enabled_partitions_set.count(partition_name))
|
||||
{
|
||||
LOG_DEBUG(log, "Partition " << partition_name << " will not be processed, since it is not in "
|
||||
<< "enabled_partitions of " << task_table.table_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const String & partition_name : existing_partitions_names)
|
||||
filtered_partitions_names.emplace_back(partition_name);
|
||||
}
|
||||
|
||||
for (const String & partition_name : filtered_partitions_names)
|
||||
{
|
||||
task_shard->partition_tasks.emplace(partition_name, ShardPartition(*task_shard, partition_name));
|
||||
|
||||
ClusterPartition & cluster_partition = task_table.cluster_partitions[partition_name];
|
||||
cluster_partition.shards.emplace_back(task_shard);
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Will fetch " << task_shard->partitions.size() << " partitions");
|
||||
LOG_DEBUG(log, "Will copy " << task_shard->partition_tasks.size() << " partitions from shard " << task_shard->getDescription());
|
||||
}
|
||||
}
|
||||
|
||||
@ -786,11 +899,11 @@ public:
|
||||
/// NOTE: shards are sorted by "distance" to current host
|
||||
for (const TaskShardPtr & shard : shards_with_partition)
|
||||
{
|
||||
auto it_shard_partition = shard->partitions.find(partition_name);
|
||||
if (it_shard_partition == shard->partitions.end())
|
||||
auto it_shard_partition = shard->partition_tasks.find(partition_name);
|
||||
if (it_shard_partition == shard->partition_tasks.end())
|
||||
throw Exception("There are no such partition in a shard. This is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
TaskPartition & task_shard_partition = it_shard_partition->second;
|
||||
ShardPartition & task_shard_partition = it_shard_partition->second;
|
||||
if (processPartitionTask(task_shard_partition))
|
||||
++num_successful_shards;
|
||||
}
|
||||
@ -905,7 +1018,7 @@ public:
|
||||
Strings status_paths;
|
||||
for (auto & shard : shards_with_partition)
|
||||
{
|
||||
TaskPartition & task_shard_partition = shard->partitions.find(partition_name)->second;
|
||||
ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second;
|
||||
status_paths.emplace_back(task_shard_partition.getShardStatusPath());
|
||||
}
|
||||
|
||||
@ -998,6 +1111,7 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes MATERIALIZED and ALIAS columns from create table query
|
||||
static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast)
|
||||
{
|
||||
const ASTs & column_asts = typeid_cast<ASTCreateQuery &>(*query_ast).columns->children;
|
||||
@ -1025,6 +1139,7 @@ protected:
|
||||
return new_query_ast;
|
||||
}
|
||||
|
||||
/// Replaces ENGINE and table name in a create query
|
||||
std::shared_ptr<ASTCreateQuery> rewriteCreateQueryStorage(const ASTPtr & create_query_ast, const DatabaseAndTableName & new_table, const ASTPtr & new_storage_ast)
|
||||
{
|
||||
ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*create_query_ast);
|
||||
@ -1043,7 +1158,7 @@ protected:
|
||||
return res;
|
||||
}
|
||||
|
||||
bool tryDropPartition(TaskPartition & task_partition, const zkutil::ZooKeeperPtr & zookeeper)
|
||||
bool tryDropPartition(ShardPartition & task_partition, const zkutil::ZooKeeperPtr & zookeeper)
|
||||
{
|
||||
if (is_safe_mode)
|
||||
throw Exception("DROP PARTITION is prohibited in safe mode", ErrorCodes::NOT_IMPLEMENTED);
|
||||
@ -1117,7 +1232,7 @@ protected:
|
||||
}
|
||||
|
||||
|
||||
bool processPartitionTask(TaskPartition & task_partition)
|
||||
bool processPartitionTask(ShardPartition & task_partition)
|
||||
{
|
||||
bool res;
|
||||
|
||||
@ -1144,7 +1259,7 @@ protected:
|
||||
return res;
|
||||
}
|
||||
|
||||
bool processPartitionTaskImpl(TaskPartition & task_partition)
|
||||
bool processPartitionTaskImpl(ShardPartition & task_partition)
|
||||
{
|
||||
TaskShard & task_shard = task_partition.task_shard;
|
||||
TaskTable & task_table = task_shard.task_table;
|
||||
@ -1172,7 +1287,7 @@ protected:
|
||||
{
|
||||
String query;
|
||||
query += "SELECT " + fields + " FROM " + getDatabaseDotTable(from_table);
|
||||
query += " WHERE (_part LIKE '" + task_partition.name + "%')";
|
||||
query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = " + task_partition.name + ")";
|
||||
if (!task_table.where_condition_str.empty())
|
||||
query += " AND (" + task_table.where_condition_str + ")";
|
||||
if (!limit.empty())
|
||||
@ -1245,45 +1360,13 @@ protected:
|
||||
|
||||
zookeeper->createAncestors(current_task_status_path);
|
||||
|
||||
/// We need to update table definitions for each part, it could be changed after ALTER
|
||||
ASTPtr query_create_pull_table;
|
||||
{
|
||||
/// Fetch and parse (possibly) new definition
|
||||
auto connection_entry = task_shard.info.pool->get(&task_cluster->settings_pull);
|
||||
String create_query_pull_str = getRemoteCreateTable(task_table.table_pull, *connection_entry, &task_cluster->settings_pull);
|
||||
|
||||
ParserCreateQuery parser_create_query;
|
||||
query_create_pull_table = parseQuery(parser_create_query, create_query_pull_str);
|
||||
}
|
||||
|
||||
/// Create local Distributed tables:
|
||||
/// a table fetching data from current shard and a table inserting data to the whole destination cluster
|
||||
DatabaseAndTableName table_shard(working_database_name, ".read_shard." + task_table.table_id);
|
||||
DatabaseAndTableName table_split(working_database_name, ".split." + task_table.table_id);
|
||||
{
|
||||
/// Create special cluster with single shard
|
||||
String shard_read_cluster_name = ".read_shard." + task_table.cluster_pull_name;
|
||||
ClusterPtr cluster_pull_current_shard = task_table.cluster_pull->getClusterWithSingleShard(task_shard.indexInCluster());
|
||||
context.setCluster(shard_read_cluster_name, cluster_pull_current_shard);
|
||||
|
||||
auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second);
|
||||
const auto & storage_split_ast = task_table.engine_split_ast;
|
||||
|
||||
auto create_query_ast = removeAliasColumnsFromCreateQuery(query_create_pull_table);
|
||||
auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, table_shard, storage_shard_ast);
|
||||
auto create_table_split_ast = rewriteCreateQueryStorage(create_query_ast, table_split, storage_split_ast);
|
||||
|
||||
//LOG_DEBUG(log, "Create shard reading table. Query: " << queryToString(create_table_pull_ast));
|
||||
dropAndCreateLocalTable(create_table_pull_ast);
|
||||
|
||||
//LOG_DEBUG(log, "Create split table. Query: " << queryToString(create_table_split_ast));
|
||||
dropAndCreateLocalTable(create_table_split_ast);
|
||||
}
|
||||
/// We need to update table definitions for each partition, it could be changed after ALTER
|
||||
createShardInternalTables(task_shard);
|
||||
|
||||
/// Check that destination partition is empty if we are first worker
|
||||
/// NOTE: this check is incorrect if pull and push tables have different partition key!
|
||||
{
|
||||
ASTPtr query_select_ast = get_select_query(table_split, "count()");
|
||||
ASTPtr query_select_ast = get_select_query(task_shard.table_split_shard, "count()");
|
||||
UInt64 count;
|
||||
{
|
||||
Context local_context = context;
|
||||
@ -1292,9 +1375,8 @@ protected:
|
||||
local_context.getSettingsRef().skip_unavailable_shards = true;
|
||||
|
||||
InterpreterSelectQuery interperter(query_select_ast, local_context);
|
||||
BlockIO io = interperter.execute();
|
||||
|
||||
Block block = getBlockWithAllStreamData(io.in);
|
||||
Block block = getBlockWithAllStreamData(interperter.execute().in);
|
||||
count = (block) ? block.safeGetByPosition(0).column->getUInt(0) : 0;
|
||||
}
|
||||
|
||||
@ -1337,15 +1419,15 @@ protected:
|
||||
|
||||
/// Try create table (if not exists) on each shard
|
||||
{
|
||||
auto create_query_push_ast = rewriteCreateQueryStorage(query_create_pull_table, task_table.table_push, task_table.engine_push_ast);
|
||||
auto create_query_push_ast = rewriteCreateQueryStorage(task_shard.current_pull_table_create_query, task_table.table_push, task_table.engine_push_ast);
|
||||
typeid_cast<ASTCreateQuery &>(*create_query_push_ast).if_not_exists = true;
|
||||
String query = queryToString(create_query_push_ast);
|
||||
|
||||
LOG_DEBUG(log, "Create remote push tables. Query: " << query);
|
||||
LOG_DEBUG(log, "Create destination tables. Query: " << query);
|
||||
size_t shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push,
|
||||
PoolMode::GET_MANY);
|
||||
LOG_DEBUG(log, "Remote push tables have been created on " << shards << " shards of "
|
||||
<< task_table.cluster_push->getShardCount());
|
||||
LOG_DEBUG(log, "Destination tables " << getDatabaseDotTable(task_table.table_push) << " have been created on " << shards
|
||||
<< " shards of " << task_table.cluster_push->getShardCount());
|
||||
}
|
||||
|
||||
/// Do the copying
|
||||
@ -1359,14 +1441,14 @@ protected:
|
||||
}
|
||||
|
||||
// Select all fields
|
||||
ASTPtr query_select_ast = get_select_query(table_shard, "*", inject_fault ? "1" : "");
|
||||
ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", inject_fault ? "1" : "");
|
||||
|
||||
LOG_DEBUG(log, "Executing SELECT query: " << queryToString(query_select_ast));
|
||||
|
||||
ASTPtr query_insert_ast;
|
||||
{
|
||||
String query;
|
||||
query += "INSERT INTO " + getDatabaseDotTable(table_split) + " VALUES ";
|
||||
query += "INSERT INTO " + getDatabaseDotTable(task_shard.table_split_shard) + " VALUES ";
|
||||
|
||||
ParserQuery p_query(query.data() + query.size());
|
||||
query_insert_ast = parseQuery(p_query, query);
|
||||
@ -1521,35 +1603,90 @@ protected:
|
||||
return typeid_cast<const ColumnString &>(*block.safeGetByPosition(0).column).getDataAt(0).toString();
|
||||
}
|
||||
|
||||
Strings getRemotePartitions(const DatabaseAndTableName & table, Connection & connection, const Settings * settings = nullptr)
|
||||
ASTPtr getCreateTableForPullShard(TaskShard & task_shard)
|
||||
{
|
||||
Block block;
|
||||
/// Fetch and parse (possibly) new definition
|
||||
auto connection_entry = task_shard.info.pool->get(&task_cluster->settings_pull);
|
||||
String create_query_pull_str = getRemoteCreateTable(task_shard.task_table.table_pull, *connection_entry,
|
||||
&task_cluster->settings_pull);
|
||||
|
||||
ParserCreateQuery parser_create_query;
|
||||
return parseQuery(parser_create_query, create_query_pull_str);
|
||||
}
|
||||
|
||||
void createShardInternalTables(TaskShard & task_shard)
|
||||
{
|
||||
TaskTable & task_table = task_shard.task_table;
|
||||
|
||||
/// We need to update table definitions for each part, it could be changed after ALTER
|
||||
task_shard.current_pull_table_create_query = getCreateTableForPullShard(task_shard);
|
||||
|
||||
/// Create local Distributed tables:
|
||||
/// a table fetching data from current shard and a table inserting data to the whole destination cluster
|
||||
String read_shard_prefix = ".read_shard_" + toString(task_shard.indexInCluster()) + ".";
|
||||
String split_shard_prefix = ".split.";
|
||||
task_shard.table_read_shard = DatabaseAndTableName(working_database_name, read_shard_prefix + task_table.table_id);
|
||||
task_shard.table_split_shard = DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id);
|
||||
|
||||
/// Create special cluster with single shard
|
||||
String shard_read_cluster_name = read_shard_prefix + task_table.cluster_pull_name;
|
||||
ClusterPtr cluster_pull_current_shard = task_table.cluster_pull->getClusterWithSingleShard(task_shard.indexInCluster());
|
||||
context.setCluster(shard_read_cluster_name, cluster_pull_current_shard);
|
||||
|
||||
auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second);
|
||||
const auto & storage_split_ast = task_table.engine_split_ast;
|
||||
|
||||
auto create_query_ast = removeAliasColumnsFromCreateQuery(task_shard.current_pull_table_create_query);
|
||||
auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_read_shard, storage_shard_ast);
|
||||
auto create_table_split_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_split_shard, storage_split_ast);
|
||||
|
||||
//LOG_DEBUG(log, "Create shard reading table. Query: " << queryToString(create_table_pull_ast));
|
||||
dropAndCreateLocalTable(create_table_pull_ast);
|
||||
|
||||
//LOG_DEBUG(log, "Create split table. Query: " << queryToString(create_table_split_ast));
|
||||
dropAndCreateLocalTable(create_table_split_ast);
|
||||
}
|
||||
|
||||
|
||||
std::set<String> getShardPartitions(TaskShard & task_shard)
|
||||
{
|
||||
createShardInternalTables(task_shard);
|
||||
|
||||
TaskTable & task_table = task_shard.task_table;
|
||||
|
||||
String query;
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
wb << "SELECT DISTINCT partition FROM system.parts WHERE"
|
||||
<< " database = " << DB::quote << table.first
|
||||
<< " AND table = " << DB::quote << table.second;
|
||||
|
||||
block = getBlockWithAllStreamData(std::make_shared<RemoteBlockInputStream>(
|
||||
connection, wb.str(), Block{{ ColumnString::create(), std::make_shared<DataTypeString>(), "partition" }}, context, settings));
|
||||
wb << "SELECT DISTINCT " << queryToString(task_table.engine_push_partition_key_ast) << " AS partition FROM"
|
||||
<< " " << getDatabaseDotTable(task_shard.table_read_shard) << " ORDER BY partition DESC";
|
||||
query = wb.str();
|
||||
}
|
||||
|
||||
Strings res;
|
||||
LOG_DEBUG(log, "Computing destination partition set, executing query: " << query);
|
||||
|
||||
ParserQuery parser_query(query.data() + query.size());
|
||||
ASTPtr query_ast = parseQuery(parser_query, query);
|
||||
|
||||
Context local_context = context;
|
||||
InterpreterSelectQuery interp(query_ast, local_context);
|
||||
Block block = getBlockWithAllStreamData(interp.execute().in);
|
||||
|
||||
std::set<String> res;
|
||||
if (block)
|
||||
{
|
||||
auto & partition_col = typeid_cast<const ColumnString &>(*block.getByName("partition").column);
|
||||
for (size_t i = 0; i < partition_col.size(); ++i)
|
||||
res.push_back(partition_col.getDataAt(i).toString());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!existsRemoteTable(table, connection))
|
||||
ColumnWithTypeAndName & column = block.getByPosition(0);
|
||||
task_shard.partition_key_column = column;
|
||||
|
||||
for (size_t i = 0; i < column.column->size(); ++i)
|
||||
{
|
||||
throw Exception("Table " + getDatabaseDotTable(table) + " is not exists on server "
|
||||
+ connection.getDescription(), ErrorCodes::UNKNOWN_TABLE);
|
||||
WriteBufferFromOwnString wb;
|
||||
column.type->serializeTextQuoted(*column.column, i, wb);
|
||||
res.emplace(wb.str());
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "There are " << res.size() << " destination partitions in shard " << task_shard.getDescription());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1610,7 +1747,7 @@ protected:
|
||||
Settings current_settings = settings ? *settings : task_cluster->settings_common;
|
||||
current_settings.max_parallel_replicas = num_remote_replicas ? num_remote_replicas : 1;
|
||||
|
||||
std::vector<IConnectionPool::Entry> connections = shard.pool->getMany(¤t_settings, pool_mode);
|
||||
auto connections = shard.pool->getMany(¤t_settings, pool_mode);
|
||||
|
||||
for (auto & connection : connections)
|
||||
{
|
||||
@ -1619,7 +1756,8 @@ protected:
|
||||
|
||||
try
|
||||
{
|
||||
RemoteBlockInputStream stream(*connection, query, context, ¤t_settings);
|
||||
/// CREATE TABLE and DROP PARTITION return empty block
|
||||
RemoteBlockInputStream stream(*connection, query, Block(), context, ¤t_settings);
|
||||
NullBlockOutputStream output;
|
||||
copyData(stream, output);
|
||||
|
||||
@ -1780,7 +1918,7 @@ void ClusterCopierApp::setupLogging()
|
||||
}
|
||||
|
||||
Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter);
|
||||
formatter->setProperty("pattern", "%L%Y-%m-%d %H:%M:%S.%i [ %I ] <%p> %s: %t");
|
||||
formatter->setProperty("pattern", "%L%Y-%m-%d %H:%M:%S.%i <%p> %s: %t");
|
||||
Poco::AutoPtr<Poco::FormattingChannel> formatting_channel(new Poco::FormattingChannel(formatter));
|
||||
formatting_channel->setChannel(split_channel);
|
||||
split_channel->open();
|
||||
@ -1840,7 +1978,7 @@ int ClusterCopierApp::main(const std::vector<std::string> &)
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
|
||||
tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__);
|
||||
auto code = getCurrentExceptionCode();
|
||||
|
||||
return (code) ? code : -1;
|
||||
|
@ -99,9 +99,8 @@
|
||||
|
||||
NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
|
||||
be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
|
||||
specify partitions that should be copied in <enabled_partitions/>.
|
||||
|
||||
NOTE: Currently partition key of source and destination tables should be the same.
|
||||
specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
|
||||
system.parts table.
|
||||
-->
|
||||
<engine>ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/hits2/{shard}/hits2', '{replica}', EventDate, (CounterID, EventDate), 8192)</engine>
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
<enabled_partitions> 0 1 2</enabled_partitions>
|
||||
|
||||
<!-- Engine of destination tables -->
|
||||
<engine>ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16</engine>
|
||||
<engine>ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/hits', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16</engine>
|
||||
|
||||
<!-- Which sarding key to use while copying -->
|
||||
<sharding_key>d + 1</sharding_key>
|
||||
|
@ -0,0 +1,89 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<!-- How many simualteneous workers are posssible -->
|
||||
<max_workers>4</max_workers>
|
||||
|
||||
<!-- Common setting for pull and push operations -->
|
||||
<settings>
|
||||
<connect_timeout>1</connect_timeout>
|
||||
</settings>
|
||||
|
||||
<!-- Tasks -->
|
||||
<tables>
|
||||
<AB>
|
||||
<cluster_pull>cluster0</cluster_pull>
|
||||
<database_pull>default</database_pull>
|
||||
<table_pull>a</table_pull>
|
||||
|
||||
<cluster_push>cluster1</cluster_push>
|
||||
<database_push>default</database_push>
|
||||
<table_push>b</table_push>
|
||||
|
||||
<!--
|
||||
<enabled_partitions>
|
||||
<partition></partition>
|
||||
</enabled_partitions>
|
||||
-->
|
||||
|
||||
<!-- Engine of destination tables -->
|
||||
<engine>ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/b', '{replica}') PARTITION BY toMonday(date) ORDER BY d</engine>
|
||||
|
||||
<!-- Which sarding key to use while copying -->
|
||||
<sharding_key>d + 1</sharding_key>
|
||||
|
||||
<!-- Optional expression that filter copying data -->
|
||||
<!-- <where_condition></where_condition> -->
|
||||
</AB>
|
||||
</tables>
|
||||
|
||||
<!-- Configuration of clusters -->
|
||||
<remote_servers>
|
||||
<cluster0>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s0_0_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>s0_0_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s0_1_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</cluster0>
|
||||
|
||||
<cluster1>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s1_0_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>s1_0_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<internal_replication>true</internal_replication>
|
||||
<replica>
|
||||
<host>s1_1_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<!-- Died replica -->
|
||||
<host>255.255.255.255</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</cluster1>
|
||||
</remote_servers>
|
||||
|
||||
</yandex>
|
@ -69,27 +69,86 @@ def started_cluster():
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def _test_copying(cmd_options):
|
||||
instance = cluster.instances['s0_0_0']
|
||||
class Task1:
|
||||
|
||||
ddl_check_query(instance, "DROP TABLE IF EXISTS hits ON CLUSTER cluster0")
|
||||
ddl_check_query(instance, "DROP TABLE IF EXISTS hits ON CLUSTER cluster1")
|
||||
ddl_check_query(instance, "DROP TABLE IF EXISTS hits_all ON CLUSTER cluster0")
|
||||
ddl_check_query(instance, "DROP TABLE IF EXISTS hits_all ON CLUSTER cluster1")
|
||||
def __init__(self, cluster):
|
||||
self.cluster = cluster
|
||||
self.zk_task_path="/clickhouse-copier/task_simple"
|
||||
self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task0_description.xml'), 'r').read()
|
||||
|
||||
ddl_check_query(instance, "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16")
|
||||
ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster0 (d UInt64) ENGINE=Distributed(cluster0, default, hits, d)")
|
||||
ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster1 (d UInt64) ENGINE=Distributed(cluster1, default, hits, d + 1)")
|
||||
instance.query("INSERT INTO hits_all SELECT * FROM system.numbers LIMIT 1002")
|
||||
|
||||
def start(self):
|
||||
instance = cluster.instances['s0_0_0']
|
||||
|
||||
for cluster_num in ["0", "1"]:
|
||||
ddl_check_query(instance, "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format(cluster_num))
|
||||
ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format(cluster_num))
|
||||
|
||||
ddl_check_query(instance, "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/hits', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16")
|
||||
ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster0 (d UInt64) ENGINE=Distributed(cluster0, default, hits, d)")
|
||||
ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster1 (d UInt64) ENGINE=Distributed(cluster1, default, hits, d + 1)")
|
||||
instance.query("INSERT INTO hits_all SELECT * FROM system.numbers LIMIT 1002", settings={"insert_distributed_sync": 1})
|
||||
|
||||
|
||||
def check(self):
|
||||
assert TSV(self.cluster.instances['s0_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n")
|
||||
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n")
|
||||
|
||||
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("1\n")
|
||||
assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("0\n")
|
||||
|
||||
instance = self.cluster.instances['s0_0_0']
|
||||
ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0")
|
||||
ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster1")
|
||||
ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster0")
|
||||
ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster1")
|
||||
|
||||
|
||||
class Task2:
|
||||
|
||||
def __init__(self, cluster):
|
||||
self.cluster = cluster
|
||||
self.zk_task_path="/clickhouse-copier/task_month_to_week_partition"
|
||||
self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_month_to_week_description.xml'), 'r').read()
|
||||
|
||||
|
||||
def start(self):
|
||||
instance = cluster.instances['s0_0_0']
|
||||
|
||||
for cluster_num in ["0", "1"]:
|
||||
ddl_check_query(instance, "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format(cluster_num))
|
||||
ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format(cluster_num))
|
||||
|
||||
ddl_check_query(instance, "CREATE TABLE a ON CLUSTER cluster0 (date Date, d UInt64, d1 UInt64 ALIAS d+1) ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/a', '{replica}', date, intHash64(d), (date, intHash64(d)), 8192)")
|
||||
ddl_check_query(instance, "CREATE TABLE a_all ON CLUSTER cluster0 (date Date, d UInt64) ENGINE=Distributed(cluster0, default, a, d)")
|
||||
|
||||
instance.query("INSERT INTO a_all SELECT toDate(17581 + number) AS date, number AS d FROM system.numbers LIMIT 85", settings={"insert_distributed_sync": 1})
|
||||
|
||||
|
||||
def check(self):
|
||||
assert TSV(self.cluster.instances['s0_0_0'].query("SELECT count() FROM cluster(cluster0, default, a)")) == TSV("85\n")
|
||||
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count(), uniqExact(date) FROM cluster(cluster1, default, b)")) == TSV("85\t85\n")
|
||||
|
||||
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM b")) == TSV("1\n")
|
||||
assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM b")) == TSV("0\n")
|
||||
|
||||
assert TSV(self.cluster.instances['s1_0_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n")
|
||||
assert TSV(self.cluster.instances['s1_1_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n")
|
||||
|
||||
instance = cluster.instances['s0_0_0']
|
||||
ddl_check_query(instance, "DROP TABLE a ON CLUSTER cluster0")
|
||||
ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1")
|
||||
|
||||
|
||||
def execute_task(task, cmd_options):
|
||||
task.start()
|
||||
|
||||
zk = cluster.get_kazoo_client('zoo1')
|
||||
print "Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])
|
||||
|
||||
zk_task_path = "/clickhouse-copier/task_simple"
|
||||
zk_task_path = task.zk_task_path
|
||||
zk.ensure_path(zk_task_path)
|
||||
|
||||
copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task0_description.xml'), 'r').read()
|
||||
zk.create(zk_task_path + "/description", copier_task_config)
|
||||
zk.create(zk_task_path + "/description", task.copier_task_config)
|
||||
|
||||
# Run cluster-copier processes on each node
|
||||
docker_api = docker.from_env().api
|
||||
@ -97,7 +156,7 @@ def _test_copying(cmd_options):
|
||||
|
||||
cmd = ['/usr/bin/clickhouse', 'copier',
|
||||
'--config', '/etc/clickhouse-server/config-preprocessed.xml',
|
||||
'--task-path', '/clickhouse-copier/task_simple',
|
||||
'--task-path', zk_task_path,
|
||||
'--base-dir', '/var/log/clickhouse-server/copier']
|
||||
cmd += cmd_options
|
||||
|
||||
@ -119,27 +178,26 @@ def _test_copying(cmd_options):
|
||||
|
||||
assert res['ExitCode'] == 0, "Instance: {} ({}). Info: {}".format(instance.name, instance.ip_address, repr(res))
|
||||
|
||||
assert TSV(cluster.instances['s0_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n")
|
||||
assert TSV(cluster.instances['s1_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n")
|
||||
|
||||
assert TSV(cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("1\n")
|
||||
assert TSV(cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("0\n")
|
||||
|
||||
zk.delete(zk_task_path, recursive=True)
|
||||
ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0")
|
||||
ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster1")
|
||||
ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster0")
|
||||
ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster1")
|
||||
try:
|
||||
task.check()
|
||||
finally:
|
||||
zk.delete(zk_task_path, recursive=True)
|
||||
|
||||
|
||||
def test_copy_simple(started_cluster):
|
||||
_test_copying([])
|
||||
def test_copy1_simple(started_cluster):
|
||||
execute_task(Task1(started_cluster), [])
|
||||
|
||||
|
||||
def test_copy_with_recovering(started_cluster):
|
||||
_test_copying(['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)])
|
||||
def test_copy1_with_recovering(started_cluster):
|
||||
execute_task(Task1(started_cluster), ['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)])
|
||||
|
||||
|
||||
def test_copy_month_to_week_partition(started_cluster):
|
||||
execute_task(Task2(started_cluster), [])
|
||||
|
||||
def test_copy_month_to_week_partition(started_cluster):
|
||||
execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.1)])
|
||||
|
||||
if __name__ == '__main__':
|
||||
with contextmanager(started_cluster)() as cluster:
|
||||
for name, instance in cluster.instances.items():
|
||||
|
Loading…
Reference in New Issue
Block a user