2024-01-02 17:50:06 +00:00
|
|
|
#include <Storages/IStorageCluster.h>
|
2023-04-21 17:24:37 +00:00
|
|
|
|
2024-01-02 17:50:06 +00:00
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Core/QueryProcessingStage.h>
|
2023-04-21 17:24:37 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <IO/ConnectionTimeouts.h>
|
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Interpreters/getHeaderForProcessingStage.h>
|
|
|
|
#include <Interpreters/SelectQueryOptions.h>
|
|
|
|
#include <Interpreters/InterpreterSelectQuery.h>
|
|
|
|
#include <Interpreters/AddDefaultDatabaseVisitor.h>
|
|
|
|
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
|
|
|
|
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
2024-01-02 17:50:06 +00:00
|
|
|
#include <Parsers/queryToString.h>
|
|
|
|
#include <Processors/Sources/NullSource.h>
|
|
|
|
#include <Processors/Sources/RemoteSource.h>
|
|
|
|
#include <Processors/QueryPlan/SourceStepWithFilter.h>
|
2023-04-21 17:24:37 +00:00
|
|
|
#include <QueryPipeline/narrowPipe.h>
|
|
|
|
#include <QueryPipeline/Pipe.h>
|
|
|
|
#include <QueryPipeline/RemoteQueryExecutor.h>
|
2024-01-02 17:50:06 +00:00
|
|
|
#include <QueryPipeline/QueryPipelineBuilder.h>
|
2023-04-21 17:24:37 +00:00
|
|
|
#include <Storages/IStorage.h>
|
|
|
|
#include <Storages/SelectQueryInfo.h>
|
|
|
|
#include <Storages/StorageDictionary.h>
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
IStorageCluster::IStorageCluster(
|
2023-04-24 13:20:04 +00:00
|
|
|
const String & cluster_name_,
|
2023-04-21 17:24:37 +00:00
|
|
|
const StorageID & table_id_,
|
2024-01-25 21:11:07 +00:00
|
|
|
LoggerPtr log_)
|
2023-04-21 17:24:37 +00:00
|
|
|
: IStorage(table_id_)
|
|
|
|
, log(log_)
|
|
|
|
, cluster_name(cluster_name_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2024-01-02 17:14:16 +00:00
|
|
|
class ReadFromCluster : public SourceStepWithFilter
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
std::string getName() const override { return "ReadFromCluster"; }
|
|
|
|
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
|
|
|
|
void applyFilters() override;
|
|
|
|
|
|
|
|
ReadFromCluster(
|
|
|
|
Block sample_block,
|
|
|
|
std::shared_ptr<IStorageCluster> storage_,
|
|
|
|
ASTPtr query_to_send_,
|
|
|
|
QueryProcessingStage::Enum processed_stage_,
|
|
|
|
ClusterPtr cluster_,
|
2024-01-23 17:04:50 +00:00
|
|
|
LoggerPtr log_,
|
2024-01-02 17:14:16 +00:00
|
|
|
ContextPtr context_)
|
|
|
|
: SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
|
|
|
|
, storage(std::move(storage_))
|
|
|
|
, query_to_send(std::move(query_to_send_))
|
|
|
|
, processed_stage(processed_stage_)
|
|
|
|
, cluster(std::move(cluster_))
|
|
|
|
, log(log_)
|
|
|
|
, context(std::move(context_))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::shared_ptr<IStorageCluster> storage;
|
|
|
|
ASTPtr query_to_send;
|
|
|
|
QueryProcessingStage::Enum processed_stage;
|
|
|
|
ClusterPtr cluster;
|
2024-01-23 17:04:50 +00:00
|
|
|
LoggerPtr log;
|
2024-01-02 17:14:16 +00:00
|
|
|
ContextPtr context;
|
|
|
|
|
|
|
|
std::optional<RemoteQueryExecutor::Extension> extension;
|
|
|
|
|
|
|
|
void createExtension(const ActionsDAG::Node * predicate);
|
|
|
|
ContextPtr updateSettings(const Settings & settings);
|
|
|
|
};
|
|
|
|
|
|
|
|
void ReadFromCluster::applyFilters()
|
|
|
|
{
|
2024-01-25 15:22:49 +00:00
|
|
|
auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes);
|
2024-01-02 17:14:16 +00:00
|
|
|
const ActionsDAG::Node * predicate = nullptr;
|
|
|
|
if (filter_actions_dag)
|
|
|
|
predicate = filter_actions_dag->getOutputs().at(0);
|
|
|
|
|
|
|
|
createExtension(predicate);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReadFromCluster::createExtension(const ActionsDAG::Node * predicate)
|
|
|
|
{
|
|
|
|
if (extension)
|
|
|
|
return;
|
|
|
|
|
|
|
|
extension = storage->getTaskIteratorExtension(predicate, context);
|
|
|
|
}
|
2023-04-21 17:24:37 +00:00
|
|
|
|
|
|
|
/// The code executes on initiator
|
2024-01-02 17:14:16 +00:00
|
|
|
void IStorageCluster::read(
|
|
|
|
QueryPlan & query_plan,
|
2023-04-21 17:24:37 +00:00
|
|
|
const Names & column_names,
|
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
|
|
|
SelectQueryInfo & query_info,
|
|
|
|
ContextPtr context,
|
|
|
|
QueryProcessingStage::Enum processed_stage,
|
|
|
|
size_t /*max_block_size*/,
|
|
|
|
size_t /*num_streams*/)
|
|
|
|
{
|
2024-01-02 17:14:16 +00:00
|
|
|
storage_snapshot->check(column_names);
|
2023-04-21 17:24:37 +00:00
|
|
|
|
2024-01-02 17:14:16 +00:00
|
|
|
updateBeforeRead(context);
|
2023-04-21 17:24:37 +00:00
|
|
|
auto cluster = getCluster(context);
|
|
|
|
|
|
|
|
/// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*)
|
|
|
|
|
|
|
|
Block sample_block;
|
|
|
|
ASTPtr query_to_send = query_info.query;
|
|
|
|
|
|
|
|
if (context->getSettingsRef().allow_experimental_analyzer)
|
|
|
|
{
|
|
|
|
sample_block = InterpreterSelectQueryAnalyzer::getSampleBlock(query_info.query, context, SelectQueryOptions(processed_stage));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze());
|
|
|
|
sample_block = interpreter.getSampleBlock();
|
|
|
|
query_to_send = interpreter.getQueryInfo().query->clone();
|
|
|
|
}
|
|
|
|
|
2024-01-22 22:55:50 +00:00
|
|
|
updateQueryToSendIfNeeded(query_to_send, storage_snapshot, context);
|
2023-04-21 17:24:37 +00:00
|
|
|
|
|
|
|
RestoreQualifiedNamesVisitor::Data data;
|
|
|
|
data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0));
|
|
|
|
data.remote_table.database = context->getCurrentDatabase();
|
|
|
|
data.remote_table.table = getName();
|
|
|
|
RestoreQualifiedNamesVisitor(data).visit(query_to_send);
|
|
|
|
AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase(),
|
|
|
|
/* only_replace_current_database_function_= */false,
|
|
|
|
/* only_replace_in_join_= */true);
|
|
|
|
visitor.visit(query_to_send);
|
|
|
|
|
2024-01-02 17:14:16 +00:00
|
|
|
auto this_ptr = std::static_pointer_cast<IStorageCluster>(shared_from_this());
|
|
|
|
|
|
|
|
auto reading = std::make_unique<ReadFromCluster>(
|
|
|
|
sample_block,
|
|
|
|
std::move(this_ptr),
|
|
|
|
std::move(query_to_send),
|
|
|
|
processed_stage,
|
|
|
|
cluster,
|
|
|
|
log,
|
|
|
|
context);
|
|
|
|
|
|
|
|
query_plan.addStep(std::move(reading));
|
|
|
|
}
|
|
|
|
|
|
|
|
void ReadFromCluster::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
|
|
|
|
{
|
|
|
|
createExtension(nullptr);
|
|
|
|
|
|
|
|
const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
|
|
|
|
const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
|
|
|
|
|
|
|
|
Pipes pipes;
|
|
|
|
auto new_context = updateSettings(context->getSettingsRef());
|
2023-05-11 12:46:20 +00:00
|
|
|
const auto & current_settings = new_context->getSettingsRef();
|
2023-04-21 17:24:37 +00:00
|
|
|
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
|
|
|
|
for (const auto & shard_info : cluster->getShardsInfo())
|
|
|
|
{
|
2023-11-21 13:29:04 +00:00
|
|
|
auto try_results = shard_info.pool->getMany(timeouts, current_settings, PoolMode::GET_MANY);
|
2023-04-21 17:24:37 +00:00
|
|
|
for (auto & try_result : try_results)
|
|
|
|
{
|
|
|
|
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
|
|
|
std::vector<IConnectionPool::Entry>{try_result},
|
|
|
|
queryToString(query_to_send),
|
2024-01-02 17:14:16 +00:00
|
|
|
getOutputStream().header,
|
2023-05-11 12:46:20 +00:00
|
|
|
new_context,
|
2023-04-21 17:24:37 +00:00
|
|
|
/*throttler=*/nullptr,
|
|
|
|
scalars,
|
|
|
|
Tables(),
|
|
|
|
processed_stage,
|
|
|
|
extension);
|
|
|
|
|
|
|
|
remote_query_executor->setLogger(log);
|
|
|
|
pipes.emplace_back(std::make_shared<RemoteSource>(remote_query_executor, add_agg_info, false, false));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-02 17:14:16 +00:00
|
|
|
auto pipe = Pipe::unitePipes(std::move(pipes));
|
|
|
|
if (pipe.empty())
|
|
|
|
pipe = Pipe(std::make_shared<NullSource>(getOutputStream().header));
|
|
|
|
|
|
|
|
for (const auto & processor : pipe.getProcessors())
|
|
|
|
processors.emplace_back(processor);
|
|
|
|
|
|
|
|
pipeline.init(std::move(pipe));
|
2023-04-21 17:24:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
QueryProcessingStage::Enum IStorageCluster::getQueryProcessingStage(
|
|
|
|
ContextPtr context, QueryProcessingStage::Enum to_stage, const StorageSnapshotPtr &, SelectQueryInfo &) const
|
|
|
|
{
|
|
|
|
/// Initiator executes query on remote node.
|
|
|
|
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
|
|
|
|
if (to_stage >= QueryProcessingStage::Enum::WithMergeableState)
|
|
|
|
return QueryProcessingStage::Enum::WithMergeableState;
|
|
|
|
|
|
|
|
/// Follower just reads the data.
|
|
|
|
return QueryProcessingStage::Enum::FetchColumns;
|
|
|
|
}
|
|
|
|
|
2024-01-02 17:14:16 +00:00
|
|
|
ContextPtr ReadFromCluster::updateSettings(const Settings & settings)
|
2023-05-11 12:46:20 +00:00
|
|
|
{
|
|
|
|
Settings new_settings = settings;
|
|
|
|
|
|
|
|
/// Cluster table functions should always skip unavailable shards.
|
|
|
|
new_settings.skip_unavailable_shards = true;
|
|
|
|
|
|
|
|
auto new_context = Context::createCopy(context);
|
|
|
|
new_context->setSettings(new_settings);
|
|
|
|
return new_context;
|
|
|
|
}
|
2023-04-21 17:24:37 +00:00
|
|
|
|
|
|
|
ClusterPtr IStorageCluster::getCluster(ContextPtr context) const
|
|
|
|
{
|
|
|
|
return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef());
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|