#include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace { template inline std::string queryToString(const ASTPtr & query) { const auto & query_ast = typeid_cast(*query); std::ostringstream s; formatAST(query_ast, s, 0, false, true); return s.str(); } /// select and insert query have different types for database and table, hence two specializations template struct rewrite_traits; template <> struct rewrite_traits { using type = ASTPtr; }; template <> struct rewrite_traits { using type = const std::string &; }; template typename rewrite_traits::type rewrite(const std::string & name, const ASTIdentifier::Kind kind) = delete; /// select query has database and table names as AST pointers template <> inline ASTPtr rewrite(const std::string & name, const ASTIdentifier::Kind kind) { return new ASTIdentifier{{}, name, kind}; } /// insert query has database and table names as bare strings template <> inline const std::string & rewrite(const std::string & name, ASTIdentifier::Kind) { return name; } /// Создает копию запроса, меняет имена базы данных и таблицы. template inline ASTPtr rewriteQuery(const ASTPtr & query, const std::string & database, const std::string & table) { /// Создаем копию запроса. auto modified_query_ast = query->clone(); /// Меняем имена таблицы и базы данных auto & modified_query = typeid_cast(*modified_query_ast); modified_query.database = rewrite(database, ASTIdentifier::Database); modified_query.table = rewrite(table, ASTIdentifier::Table); /// copy elision and RVO will work as intended, but let's be more explicit return std::move(modified_query_ast); } } StorageDistributed::StorageDistributed( const std::string & name_, NamesAndTypesListPtr columns_, const String & remote_database_, const String & remote_table_, Cluster & cluster_, const Context & context_, const ASTPtr & sharding_key_, const String & data_path_) : name(name_), columns(columns_), remote_database(remote_database_), remote_table(remote_table_), context(context_), cluster(cluster_), sharding_key_expr(sharding_key_ ? ExpressionAnalyzer(sharding_key_, context, *columns).getActions(false) : nullptr), sharding_key_column_name(sharding_key_ ? sharding_key_->getColumnName() : String{}), write_enabled(cluster.getLocalNodesNum() + cluster.pools.size() < 2 || sharding_key_), path(data_path_ + escapeForFileName(name) + '/') { std::cout << "table `" << name << "` in " << path << std::endl; createDirectoryMonitors(); } StoragePtr StorageDistributed::create( const std::string & name_, NamesAndTypesListPtr columns_, const String & remote_database_, const String & remote_table_, const String & cluster_name, Context & context_, const ASTPtr & sharding_key_, const String & data_path_) { context_.initClusters(); return (new StorageDistributed{ name_, columns_, remote_database_, remote_table_, context_.getCluster(cluster_name), context_, sharding_key_, data_path_ })->thisPtr(); } StoragePtr StorageDistributed::create( const std::string & name_, NamesAndTypesListPtr columns_, const String & remote_database_, const String & remote_table_, SharedPtr & owned_cluster_, Context & context_) { auto res = new StorageDistributed{ name_, columns_, remote_database_, remote_table_, *owned_cluster_, context_ }; /// Захватываем владение объектом-кластером. res->owned_cluster = owned_cluster_; return res->thisPtr(); } BlockInputStreams StorageDistributed::read( const Names & column_names, ASTPtr query, const Settings & settings, QueryProcessingStage::Enum & processed_stage, size_t max_block_size, unsigned threads) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.limits.max_execution_time); size_t result_size = cluster.pools.size() + cluster.getLocalNodesNum(); processed_stage = result_size == 1 ? QueryProcessingStage::Complete : QueryProcessingStage::WithMergeableState; BlockInputStreams res; const auto & modified_query_ast = rewriteQuery( query, remote_database, remote_table ); const auto & modified_query = queryToString(modified_query_ast); /// Цикл по шардам. for (auto & conn_pool : cluster.pools) res.emplace_back(new RemoteBlockInputStream{ conn_pool, modified_query, &new_settings, external_tables, processed_stage }); /// Добавляем запросы к локальному ClickHouse. if (cluster.getLocalNodesNum() > 0) { DB::Context new_context = context; new_context.setSettings(new_settings); for (auto & it : external_tables) if (!new_context.tryGetExternalTable(it.first)) new_context.addExternalTable(it.first, it.second); for(size_t i = 0; i < cluster.getLocalNodesNum(); ++i) { InterpreterSelectQuery interpreter(modified_query_ast, new_context, processed_stage); res.push_back(interpreter.execute()); } } external_tables.clear(); return res; } BlockOutputStreamPtr StorageDistributed::write(ASTPtr query) { if (!write_enabled) throw Exception{ "Method write is not supported by storage " + getName() + " with no sharding key provided", ErrorCodes::NOT_IMPLEMENTED }; return new DistributedBlockOutputStream{ *this, this->cluster, queryToString(rewriteQuery( query, remote_database, remote_table )) }; } void StorageDistributed::alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) { auto lock = lockStructureForAlter(); params.apply(*columns); InterpreterAlterQuery::updateMetadata(database_name, table_name, *columns, context); } void StorageDistributed::shutdown() { quit.store(true, std::memory_order_relaxed); for (auto & name_thread_pair : directory_monitor_threads) name_thread_pair.second.join(); } NameAndTypePair StorageDistributed::getColumn(const String & column_name) const { auto type = VirtualColumnFactory::tryGetType(column_name); if (type) return NameAndTypePair(column_name, type); return getRealColumn(column_name); } bool StorageDistributed::hasColumn(const String & column_name) const { return VirtualColumnFactory::hasColumn(column_name) || hasRealColumn(column_name); } void StorageDistributed::createDirectoryMonitors() { Poco::File(path).createDirectory(); Poco::DirectoryIterator end; for (Poco::DirectoryIterator it(path); it != end; ++it) if (it->isDirectory()) createDirectoryMonitor(it.name()); } void StorageDistributed::createDirectoryMonitor(const std::string & name) { if (directory_monitor_threads.count(name)) return; directory_monitor_threads.emplace( name, std::thread{ &StorageDistributed::directoryMonitorFunc, this, name } ); } void StorageDistributed::directoryMonitorFunc(const std::string & name) { const auto & path = this->path + name + '/'; std::cout << "created monitor for directory " << path << std::endl; auto is_local = false; ConnectionPools pools; for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it) { const auto & address = boost::copy_range(*it); const auto user_pw_end = strchr(address.data(), '@'); const auto colon = strchr(address.data(), ':'); if (!user_pw_end || !colon) throw Exception{"Shard address '" + address + "' does not match to 'user[:password]@host:port' pattern"}; const auto has_pw = colon < user_pw_end; const auto host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon; if (!host_end) throw Exception{"Shard address '" + address + "' does not contain port"}; const auto user = unescapeForFileName({address.data(), has_pw ? colon : user_pw_end}); const auto password = has_pw ? unescapeForFileName({colon + 1, user_pw_end}) : std::string{}; const auto host = unescapeForFileName({user_pw_end + 1, host_end}); const auto port = DB::parse(host_end + 1); std::cout << "\taddress " << host << " port " << port << " user " << user << " password " << password << std::endl; if (Cluster::addressIsLocal({host, port})) { is_local = true; break; } pools.emplace_back(new ConnectionPool{ 1, host, port, "", user, password, context.getDataTypeFactory(), getName() + '_' + name }); } std::cout << "local? " << std::boolalpha << is_local << std::endl; const auto pool = is_local ? (pools.size() == 1 ? pools[0] : new ConnectionPoolWithFailover(pools, DB::LoadBalancing::RANDOM) ) : nullptr; while (!quit.load(std::memory_order_relaxed)) { } std::cout << "exiting monitor for directory " << path << std::endl; } }