mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
just comments
This commit is contained in:
parent
1c41d73d9b
commit
b481682a1e
File diff suppressed because it is too large
Load Diff
@ -71,11 +71,9 @@ private:
|
||||
|
||||
void mainImpl();
|
||||
|
||||
void setupLogging();
|
||||
|
||||
std::string config_xml_path;
|
||||
std::string task_path;
|
||||
std::string log_level = "debug";
|
||||
std::string log_level = "trace";
|
||||
bool is_safe_mode = false;
|
||||
double copy_fault_probability = 0;
|
||||
bool is_help = false;
|
||||
|
@ -1,8 +1,746 @@
|
||||
//
|
||||
// Created by jakalletti on 2/7/20.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef CLICKHOUSE_INTERNALS_H
|
||||
#define CLICKHOUSE_INTERNALS_H
|
||||
#include <chrono>
|
||||
#include <optional>
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/FormattingChannel.h>
|
||||
#include <Poco/PatternFormatter.h>
|
||||
#include <Poco/UUIDGenerator.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Poco/Process.h>
|
||||
#include <Poco/FileChannel.h>
|
||||
#include <Poco/SplitterChannel.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <pcg_random.hpp>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/getFQDNOrHostName.h>
|
||||
#include <Common/isLocalAddress.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Client/Connection.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterExistsQuery.h>
|
||||
#include <Interpreters/InterpreterShowCreateQuery.h>
|
||||
#include <Interpreters/InterpreterDropQuery.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <DataStreams/RemoteBlockInputStream.h>
|
||||
#include <DataStreams/SquashingBlockInputStream.h>
|
||||
#include <DataStreams/AsynchronousBlockInputStream.h>
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <DataStreams/NullBlockOutputStream.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <TableFunctions/registerTableFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <Storages/registerStorages.h>
|
||||
#include <Storages/StorageDistributed.h>
|
||||
#include <Dictionaries/registerDictionaries.h>
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <Common/StatusFile.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NO_ZOOKEEPER;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int UNFINISHED;
|
||||
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using DatabaseAndTableName = std::pair<String, String>;
|
||||
|
||||
String getQuotedTable(const String & database, const String & table)
|
||||
{
|
||||
if (database.empty())
|
||||
{
|
||||
return backQuoteIfNeed(table);
|
||||
}
|
||||
|
||||
return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
|
||||
}
|
||||
|
||||
String getQuotedTable(const DatabaseAndTableName & db_and_table)
|
||||
{
|
||||
return getQuotedTable(db_and_table.first, db_and_table.second);
|
||||
}
|
||||
|
||||
|
||||
enum class TaskState
|
||||
{
|
||||
Started = 0,
|
||||
Finished,
|
||||
Unknown
|
||||
};
|
||||
|
||||
/// Used to mark status of shard partition tasks
|
||||
struct TaskStateWithOwner
|
||||
{
|
||||
TaskStateWithOwner() = default;
|
||||
TaskStateWithOwner(TaskState state_, const String & owner_) : state(state_), owner(owner_) {}
|
||||
|
||||
TaskState state{TaskState::Unknown};
|
||||
String owner;
|
||||
|
||||
static String getData(TaskState state, const String & owner)
|
||||
{
|
||||
return TaskStateWithOwner(state, owner).toString();
|
||||
}
|
||||
|
||||
String toString()
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
wb << static_cast<UInt32>(state) << "\n" << escape << owner;
|
||||
return wb.str();
|
||||
}
|
||||
|
||||
static TaskStateWithOwner fromString(const String & data)
|
||||
{
|
||||
ReadBufferFromString rb(data);
|
||||
TaskStateWithOwner res;
|
||||
UInt32 state;
|
||||
|
||||
rb >> state >> "\n" >> escape >> res.owner;
|
||||
|
||||
if (state >= static_cast<int>(TaskState::Unknown))
|
||||
throw Exception("Unknown state " + data, ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
res.state = static_cast<TaskState>(state);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Hierarchical description of the tasks
|
||||
struct ShardPartition;
|
||||
struct TaskShard;
|
||||
struct TaskTable;
|
||||
struct TaskCluster;
|
||||
struct ClusterPartition;
|
||||
|
||||
using TasksPartition = std::map<String, ShardPartition, std::greater<>>;
|
||||
using ShardInfo = Cluster::ShardInfo;
|
||||
using TaskShardPtr = std::shared_ptr<TaskShard>;
|
||||
using TasksShard = std::vector<TaskShardPtr>;
|
||||
using TasksTable = std::list<TaskTable>;
|
||||
using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
|
||||
|
||||
|
||||
/// Just destination partition of a shard
|
||||
/// I don't know what this comment means.
|
||||
/// In short, when we discovered what shards contain currently processing partition,
|
||||
/// This class describes a partition (name) that is stored on the shard (parent).
|
||||
struct ShardPartition
|
||||
{
|
||||
ShardPartition(TaskShard & parent, String name_quoted_) : task_shard(parent), name(std::move(name_quoted_)) {}
|
||||
|
||||
String getPartitionPath() const;
|
||||
String getPartitionCleanStartPath() const;
|
||||
String getCommonPartitionIsDirtyPath() const;
|
||||
String getCommonPartitionIsCleanedPath() const;
|
||||
String getPartitionActiveWorkersPath() const;
|
||||
String getActiveWorkerPath() const;
|
||||
String getPartitionShardsPath() const;
|
||||
String getShardStatusPath() const;
|
||||
|
||||
TaskShard & task_shard;
|
||||
String name;
|
||||
};
|
||||
|
||||
|
||||
struct ShardPriority
|
||||
{
|
||||
UInt8 is_remote = 1;
|
||||
size_t hostname_difference = 0;
|
||||
UInt8 random = 0;
|
||||
|
||||
static bool greaterPriority(const ShardPriority & current, const ShardPriority & other)
|
||||
{
|
||||
return std::forward_as_tuple(current.is_remote, current.hostname_difference, current.random)
|
||||
< std::forward_as_tuple(other.is_remote, other.hostname_difference, other.random);
|
||||
}
|
||||
};
|
||||
|
||||
/// Tables has many shards and table's partiton can be stored on different shards.
|
||||
/// When we copy partition we have to discover it's shards (shards which store this partition)
|
||||
/// For simplier retrieval of which partitions are stored in particular shard we created TaskShard.
|
||||
struct TaskShard
|
||||
{
|
||||
TaskShard(TaskTable & parent, const ShardInfo & info_) : task_table(parent), info(info_) {}
|
||||
|
||||
TaskTable & task_table;
|
||||
|
||||
ShardInfo info;
|
||||
UInt32 numberInCluster() const { return info.shard_num; }
|
||||
UInt32 indexInCluster() const { return info.shard_num - 1; }
|
||||
|
||||
String getDescription() const;
|
||||
String getHostNameExample() const;
|
||||
|
||||
/// Used to sort clusters by their proximity
|
||||
ShardPriority priority;
|
||||
|
||||
/// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
|
||||
ColumnWithTypeAndName partition_key_column;
|
||||
|
||||
/// There is a task for each destination partition
|
||||
TasksPartition partition_tasks;
|
||||
|
||||
/// Which partitions have been checked for existence
|
||||
/// If some partition from this lists is exists, it is in partition_tasks
|
||||
std::set<String> checked_partitions;
|
||||
|
||||
/// Last CREATE TABLE query of the table of the shard
|
||||
ASTPtr current_pull_table_create_query;
|
||||
|
||||
/// Internal distributed tables
|
||||
DatabaseAndTableName table_read_shard;
|
||||
DatabaseAndTableName table_split_shard;
|
||||
};
|
||||
|
||||
|
||||
/// Contains info about all shards that contain a partition
|
||||
struct ClusterPartition
|
||||
{
|
||||
double elapsed_time_seconds = 0;
|
||||
UInt64 bytes_copied = 0;
|
||||
UInt64 rows_copied = 0;
|
||||
UInt64 blocks_copied = 0;
|
||||
|
||||
UInt64 total_tries = 0;
|
||||
};
|
||||
|
||||
|
||||
struct TaskTable
|
||||
{
|
||||
TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix,
|
||||
const String & table_key);
|
||||
|
||||
TaskCluster & task_cluster;
|
||||
|
||||
String getPartitionPath(const String & partition_name) const;
|
||||
[[maybe_unused]] String getPartitionPathWithPieceNumber(const String & partition_name, size_t current_piece_number) const;
|
||||
String getPartitionIsDirtyPath(const String & partition_name) const;
|
||||
String getPartitionIsCleanedPath(const String & partition_name) const;
|
||||
String getPartitionTaskStatusPath(const String & partition_name) const;
|
||||
|
||||
/// Partitions will be splitted into number-of-splits pieces.
|
||||
/// Each piece will be copied independently. (10 by default)
|
||||
size_t number_of_splits;
|
||||
|
||||
String name_in_config;
|
||||
|
||||
/// Used as task ID
|
||||
String table_id;
|
||||
|
||||
/// Source cluster and table
|
||||
String cluster_pull_name;
|
||||
DatabaseAndTableName table_pull;
|
||||
|
||||
/// Destination cluster and table
|
||||
String cluster_push_name;
|
||||
DatabaseAndTableName table_push;
|
||||
|
||||
/// Storage of destination table
|
||||
String engine_push_str;
|
||||
ASTPtr engine_push_ast;
|
||||
ASTPtr engine_push_partition_key_ast;
|
||||
|
||||
/// A Distributed table definition used to split data
|
||||
String sharding_key_str;
|
||||
ASTPtr sharding_key_ast;
|
||||
ASTPtr engine_split_ast;
|
||||
|
||||
/// Additional WHERE expression to filter input data
|
||||
String where_condition_str;
|
||||
ASTPtr where_condition_ast;
|
||||
|
||||
/// Resolved clusters
|
||||
ClusterPtr cluster_pull;
|
||||
ClusterPtr cluster_push;
|
||||
|
||||
/// Filter partitions that should be copied
|
||||
bool has_enabled_partitions = false;
|
||||
Strings enabled_partitions;
|
||||
NameSet enabled_partitions_set;
|
||||
|
||||
/// Prioritized list of shards
|
||||
/// all_shards contains information about all shards in the table.
|
||||
/// So we have to check whether particular shard have current partiton or not while processing.
|
||||
TasksShard all_shards;
|
||||
TasksShard local_shards;
|
||||
|
||||
/// All partitions of the current table.
|
||||
ClusterPartitions cluster_partitions;
|
||||
NameSet finished_cluster_partitions;
|
||||
|
||||
/// Parition names to process in user-specified order
|
||||
Strings ordered_partition_names;
|
||||
|
||||
ClusterPartition & getClusterPartition(const String & partition_name)
|
||||
{
|
||||
auto it = cluster_partitions.find(partition_name);
|
||||
if (it == cluster_partitions.end())
|
||||
throw Exception("There are no cluster partition " + partition_name + " in " + table_id, ErrorCodes::LOGICAL_ERROR);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Stopwatch watch;
|
||||
UInt64 bytes_copied = 0;
|
||||
UInt64 rows_copied = 0;
|
||||
|
||||
template <typename RandomEngine>
|
||||
void initShards(RandomEngine && random_engine);
|
||||
};
|
||||
|
||||
|
||||
struct TaskCluster
|
||||
{
|
||||
TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
|
||||
: task_zookeeper_path(task_zookeeper_path_), default_local_database(default_local_database_) {}
|
||||
|
||||
void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
|
||||
|
||||
/// Set (or update) settings and max_workers param
|
||||
void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
|
||||
|
||||
/// Base node for all tasks. Its structure:
|
||||
/// workers/ - directory with active workers (amount of them is less or equal max_workers)
|
||||
/// description - node with task configuration
|
||||
/// table_table1/ - directories with per-partition copying status
|
||||
String task_zookeeper_path;
|
||||
|
||||
/// Database used to create temporary Distributed tables
|
||||
String default_local_database;
|
||||
|
||||
/// Limits number of simultaneous workers
|
||||
UInt64 max_workers = 0;
|
||||
|
||||
/// Base settings for pull and push
|
||||
Settings settings_common;
|
||||
/// Settings used to fetch data
|
||||
Settings settings_pull;
|
||||
/// Settings used to insert data
|
||||
Settings settings_push;
|
||||
|
||||
String clusters_prefix;
|
||||
|
||||
/// Subtasks
|
||||
TasksTable table_tasks;
|
||||
|
||||
std::random_device random_device;
|
||||
pcg64 random_engine;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct MultiTransactionInfo
|
||||
{
|
||||
int32_t code;
|
||||
Coordination::Requests requests;
|
||||
Coordination::Responses responses;
|
||||
};
|
||||
|
||||
// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key])
|
||||
std::shared_ptr<ASTStorage> createASTStorageDistributed(
|
||||
const String & cluster_name, const String & database, const String & table, const ASTPtr & sharding_key_ast = nullptr)
|
||||
{
|
||||
auto args = std::make_shared<ASTExpressionList>();
|
||||
args->children.emplace_back(std::make_shared<ASTLiteral>(cluster_name));
|
||||
args->children.emplace_back(std::make_shared<ASTIdentifier>(database));
|
||||
args->children.emplace_back(std::make_shared<ASTIdentifier>(table));
|
||||
if (sharding_key_ast)
|
||||
args->children.emplace_back(sharding_key_ast);
|
||||
|
||||
auto engine = std::make_shared<ASTFunction>();
|
||||
engine->name = "Distributed";
|
||||
engine->arguments = args;
|
||||
|
||||
auto storage = std::make_shared<ASTStorage>();
|
||||
storage->set(storage->engine, engine);
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
|
||||
BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream)
|
||||
{
|
||||
return std::make_shared<SquashingBlockInputStream>(
|
||||
stream,
|
||||
std::numeric_limits<size_t>::max(),
|
||||
std::numeric_limits<size_t>::max());
|
||||
}
|
||||
|
||||
Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream)
|
||||
{
|
||||
return squashStreamIntoOneBlock(stream)->read();
|
||||
}
|
||||
|
||||
|
||||
/// Path getters
|
||||
|
||||
String TaskTable::getPartitionPath(const String & partition_name) const
|
||||
{
|
||||
return task_cluster.task_zookeeper_path // root
|
||||
+ "/tables/" + table_id // tables/dst_cluster.merge.hits
|
||||
+ "/" + escapeForFileName(partition_name); // 201701
|
||||
}
|
||||
|
||||
String TaskTable::getPartitionPathWithPieceNumber(const String & partition_name, size_t current_piece_number) const
|
||||
{
|
||||
return getPartitionPath(partition_name) + "/" + std::to_string(current_piece_number); // 1...number_of_splits
|
||||
}
|
||||
|
||||
String ShardPartition::getPartitionCleanStartPath() const
|
||||
{
|
||||
return getPartitionPath() + "/clean_start";
|
||||
}
|
||||
|
||||
String ShardPartition::getPartitionPath() const
|
||||
{
|
||||
return task_shard.task_table.getPartitionPath(name);
|
||||
}
|
||||
|
||||
String ShardPartition::getShardStatusPath() const
|
||||
{
|
||||
// schema: /<root...>/tables/<table>/<partition>/shards/<shard>
|
||||
// e.g. /root/table_test.hits/201701/shards/1
|
||||
return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
|
||||
}
|
||||
|
||||
String ShardPartition::getPartitionShardsPath() const
|
||||
{
|
||||
return getPartitionPath() + "/shards";
|
||||
}
|
||||
|
||||
String ShardPartition::getPartitionActiveWorkersPath() const
|
||||
{
|
||||
return getPartitionPath() + "/partition_active_workers";
|
||||
}
|
||||
|
||||
String ShardPartition::getActiveWorkerPath() const
|
||||
{
|
||||
return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
|
||||
}
|
||||
|
||||
String ShardPartition::getCommonPartitionIsDirtyPath() const
|
||||
{
|
||||
return getPartitionPath() + "/is_dirty";
|
||||
}
|
||||
|
||||
String ShardPartition::getCommonPartitionIsCleanedPath() const
|
||||
{
|
||||
return getCommonPartitionIsDirtyPath() + "/cleaned";
|
||||
}
|
||||
|
||||
String TaskTable::getPartitionIsDirtyPath(const String & partition_name) const
|
||||
{
|
||||
return getPartitionPath(partition_name) + "/is_dirty";
|
||||
}
|
||||
|
||||
String TaskTable::getPartitionIsCleanedPath(const String & partition_name) const
|
||||
{
|
||||
return getPartitionIsDirtyPath(partition_name) + "/cleaned";
|
||||
}
|
||||
|
||||
String TaskTable::getPartitionTaskStatusPath(const String & partition_name) const
|
||||
{
|
||||
return getPartitionPath(partition_name) + "/shards";
|
||||
}
|
||||
|
||||
String TaskShard::getDescription() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "N" << numberInCluster()
|
||||
<< " (having a replica " << getHostNameExample()
|
||||
<< ", pull table " + getQuotedTable(task_table.table_pull)
|
||||
<< " of cluster " + task_table.cluster_pull_name << ")";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
String TaskShard::getHostNameExample() const
|
||||
{
|
||||
auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
|
||||
return replicas.at(0).readableString();
|
||||
}
|
||||
|
||||
|
||||
static bool isExtendedDefinitionStorage(const ASTPtr & storage_ast)
|
||||
{
|
||||
const auto & storage = storage_ast->as<ASTStorage &>();
|
||||
return storage.partition_by || storage.order_by || storage.sample_by;
|
||||
}
|
||||
|
||||
static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
|
||||
{
|
||||
String storage_str = queryToString(storage_ast);
|
||||
|
||||
const auto & storage = storage_ast->as<ASTStorage &>();
|
||||
const auto & engine = storage.engine->as<ASTFunction &>();
|
||||
|
||||
if (!endsWith(engine.name, "MergeTree"))
|
||||
{
|
||||
throw Exception("Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
if (isExtendedDefinitionStorage(storage_ast))
|
||||
{
|
||||
if (storage.partition_by)
|
||||
return storage.partition_by->clone();
|
||||
|
||||
static const char * all = "all";
|
||||
return std::make_shared<ASTLiteral>(Field(all, strlen(all)));
|
||||
}
|
||||
else
|
||||
{
|
||||
bool is_replicated = startsWith(engine.name, "Replicated");
|
||||
size_t min_args = is_replicated ? 3 : 1;
|
||||
|
||||
if (!engine.arguments)
|
||||
throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
ASTPtr arguments_ast = engine.arguments->clone();
|
||||
ASTs & arguments = arguments_ast->children;
|
||||
|
||||
if (arguments.size() < min_args)
|
||||
throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1];
|
||||
return makeASTFunction("toYYYYMM", month_arg->clone());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix_,
|
||||
const String & table_key)
|
||||
: task_cluster(parent)
|
||||
{
|
||||
String table_prefix = prefix_ + "." + table_key + ".";
|
||||
|
||||
name_in_config = table_key;
|
||||
|
||||
number_of_splits = config.getUInt64("number_of_splits", 10);
|
||||
|
||||
cluster_pull_name = config.getString(table_prefix + "cluster_pull");
|
||||
cluster_push_name = config.getString(table_prefix + "cluster_push");
|
||||
|
||||
table_pull.first = config.getString(table_prefix + "database_pull");
|
||||
table_pull.second = config.getString(table_prefix + "table_pull");
|
||||
|
||||
table_push.first = config.getString(table_prefix + "database_push");
|
||||
table_push.second = config.getString(table_prefix + "table_push");
|
||||
|
||||
/// Used as node name in ZooKeeper
|
||||
table_id = escapeForFileName(cluster_push_name)
|
||||
+ "." + escapeForFileName(table_push.first)
|
||||
+ "." + escapeForFileName(table_push.second);
|
||||
|
||||
engine_push_str = config.getString(table_prefix + "engine");
|
||||
{
|
||||
ParserStorage parser_storage;
|
||||
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0);
|
||||
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
|
||||
}
|
||||
|
||||
sharding_key_str = config.getString(table_prefix + "sharding_key");
|
||||
{
|
||||
ParserExpressionWithOptionalAlias parser_expression(false);
|
||||
sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0);
|
||||
engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second, sharding_key_ast);
|
||||
}
|
||||
|
||||
where_condition_str = config.getString(table_prefix + "where_condition", "");
|
||||
if (!where_condition_str.empty())
|
||||
{
|
||||
ParserExpressionWithOptionalAlias parser_expression(false);
|
||||
where_condition_ast = parseQuery(parser_expression, where_condition_str, 0);
|
||||
|
||||
// Will use canonical expression form
|
||||
where_condition_str = queryToString(where_condition_ast);
|
||||
}
|
||||
|
||||
String enabled_partitions_prefix = table_prefix + "enabled_partitions";
|
||||
has_enabled_partitions = config.has(enabled_partitions_prefix);
|
||||
|
||||
if (has_enabled_partitions)
|
||||
{
|
||||
Strings keys;
|
||||
config.keys(enabled_partitions_prefix, keys);
|
||||
|
||||
if (keys.empty())
|
||||
{
|
||||
/// Parse list of partition from space-separated string
|
||||
String partitions_str = config.getString(table_prefix + "enabled_partitions");
|
||||
boost::trim_if(partitions_str, isWhitespaceASCII);
|
||||
boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Parse sequence of <partition>...</partition>
|
||||
for (const String & key : keys)
|
||||
{
|
||||
if (!startsWith(key, "partition"))
|
||||
throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
|
||||
|
||||
enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
|
||||
}
|
||||
}
|
||||
|
||||
std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random)
|
||||
{
|
||||
ShardPriority res;
|
||||
|
||||
if (replicas.empty())
|
||||
return res;
|
||||
|
||||
res.is_remote = 1;
|
||||
for (auto & replica : replicas)
|
||||
{
|
||||
if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
|
||||
{
|
||||
res.is_remote = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
res.hostname_difference = std::numeric_limits<size_t>::max();
|
||||
for (auto & replica : replicas)
|
||||
{
|
||||
size_t difference = getHostNameDifference(local_hostname, replica.host_name);
|
||||
res.hostname_difference = std::min(difference, res.hostname_difference);
|
||||
}
|
||||
|
||||
res.random = random;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename RandomEngine>
|
||||
void TaskTable::initShards(RandomEngine && random_engine)
|
||||
{
|
||||
const String & fqdn_name = getFQDNOrHostName();
|
||||
std::uniform_int_distribution<UInt8> get_urand(0, std::numeric_limits<UInt8>::max());
|
||||
|
||||
// Compute the priority
|
||||
for (auto & shard_info : cluster_pull->getShardsInfo())
|
||||
{
|
||||
TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
|
||||
const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
|
||||
task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
|
||||
|
||||
all_shards.emplace_back(task_shard);
|
||||
}
|
||||
|
||||
// Sort by priority
|
||||
std::sort(all_shards.begin(), all_shards.end(),
|
||||
[] (const TaskShardPtr & lhs, const TaskShardPtr & rhs)
|
||||
{
|
||||
return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
|
||||
});
|
||||
|
||||
// Cut local shards
|
||||
auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
|
||||
[] (const TaskShardPtr & lhs, UInt8 is_remote)
|
||||
{
|
||||
return lhs->priority.is_remote < is_remote;
|
||||
});
|
||||
|
||||
local_shards.assign(all_shards.begin(), it_first_remote);
|
||||
}
|
||||
|
||||
|
||||
void TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
|
||||
{
|
||||
String prefix = base_key.empty() ? "" : base_key + ".";
|
||||
|
||||
clusters_prefix = prefix + "remote_servers";
|
||||
if (!config.has(clusters_prefix))
|
||||
throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys tables_keys;
|
||||
config.keys(prefix + "tables", tables_keys);
|
||||
|
||||
for (const auto & table_key : tables_keys)
|
||||
{
|
||||
table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
|
||||
}
|
||||
}
|
||||
|
||||
void TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
|
||||
{
|
||||
String prefix = base_key.empty() ? "" : base_key + ".";
|
||||
|
||||
max_workers = config.getUInt64(prefix + "max_workers");
|
||||
|
||||
settings_common = Settings();
|
||||
if (config.has(prefix + "settings"))
|
||||
settings_common.loadSettingsFromConfig(prefix + "settings", config);
|
||||
|
||||
settings_pull = settings_common;
|
||||
if (config.has(prefix + "settings_pull"))
|
||||
settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
|
||||
|
||||
settings_push = settings_common;
|
||||
if (config.has(prefix + "settings_push"))
|
||||
settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
|
||||
|
||||
auto set_default_value = [] (auto && setting, auto && default_value)
|
||||
{
|
||||
setting = setting.changed ? setting.value : default_value;
|
||||
};
|
||||
|
||||
/// Override important settings
|
||||
settings_pull.readonly = 1;
|
||||
settings_push.insert_distributed_sync = 1;
|
||||
set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
|
||||
set_default_value(settings_pull.max_threads, 1);
|
||||
set_default_value(settings_pull.max_block_size, 8192UL);
|
||||
set_default_value(settings_pull.preferred_block_size_bytes, 0);
|
||||
set_default_value(settings_push.insert_distributed_timeout, 0);
|
||||
}
|
||||
|
||||
|
||||
} // end of an anonymous namespace
|
||||
}
|
||||
|
||||
#endif //CLICKHOUSE_INTERNALS_H
|
||||
|
Loading…
Reference in New Issue
Block a user