2018-12-19 12:38:13 +00:00
|
|
|
#include <Storages/StorageDistributed.h>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Databases/IDatabase.h>
|
2021-04-04 10:27:45 +00:00
|
|
|
|
2021-01-09 12:26:37 +00:00
|
|
|
#include <Disks/IDisk.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
|
2021-10-15 20:18:20 +00:00
|
|
|
#include <QueryPipeline/RemoteQueryExecutor.h>
|
2021-04-04 10:27:45 +00:00
|
|
|
|
2018-07-05 20:38:05 +00:00
|
|
|
#include <DataTypes/DataTypeFactory.h>
|
2020-11-20 17:23:53 +00:00
|
|
|
#include <DataTypes/DataTypeUUID.h>
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2021-07-12 14:54:02 +00:00
|
|
|
#include <DataTypes/ObjectUtils.h>
|
2022-02-15 23:14:09 +00:00
|
|
|
#include <DataTypes/NestedUtils.h>
|
2018-07-05 20:38:05 +00:00
|
|
|
|
2021-07-26 16:48:25 +00:00
|
|
|
#include <Storages/Distributed/DistributedSink.h>
|
2017-12-30 00:36:06 +00:00
|
|
|
#include <Storages/StorageFactory.h>
|
2018-12-25 23:14:39 +00:00
|
|
|
#include <Storages/AlterCommands.h>
|
2021-07-12 14:54:02 +00:00
|
|
|
#include <Storages/getStructureOfRemoteTable.h>
|
2022-06-23 20:04:06 +00:00
|
|
|
#include <Storages/checkAndGetLiteralArgument.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
|
2020-04-10 09:24:16 +00:00
|
|
|
#include <Columns/ColumnConst.h>
|
|
|
|
|
2018-01-22 15:56:30 +00:00
|
|
|
#include <Common/Macros.h>
|
2021-01-27 18:43:41 +00:00
|
|
|
#include <Common/ProfileEvents.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/escapeForFileName.h>
|
2017-07-13 20:58:19 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2020-04-16 21:54:43 +00:00
|
|
|
#include <Common/quoteString.h>
|
2020-12-23 16:04:05 +00:00
|
|
|
#include <Common/randomSeed.h>
|
2021-01-26 18:45:37 +00:00
|
|
|
#include <Common/formatReadable.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
2021-11-26 17:21:54 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <Parsers/ASTIdentifier.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTInsertQuery.h>
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <Parsers/ASTLiteral.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2021-11-26 18:27:16 +00:00
|
|
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/parseQuery.h>
|
2021-12-09 21:55:14 +00:00
|
|
|
#include <Parsers/IAST.h>
|
2021-03-04 17:38:12 +00:00
|
|
|
|
2022-12-23 17:45:28 +00:00
|
|
|
#include <Analyzer/TableNode.h>
|
|
|
|
|
|
|
|
#include <Planner/Planner.h>
|
|
|
|
#include <Planner/Utils.h>
|
|
|
|
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
|
|
|
#include <Interpreters/ClusterProxy/executeQuery.h>
|
2020-06-13 16:31:28 +00:00
|
|
|
#include <Interpreters/Cluster.h>
|
2023-01-24 10:46:47 +00:00
|
|
|
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <Interpreters/ExpressionAnalyzer.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/InterpreterDescribeQuery.h>
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <Interpreters/InterpreterSelectQuery.h>
|
2022-01-10 19:01:41 +00:00
|
|
|
#include <Interpreters/InterpreterInsertQuery.h>
|
2021-04-04 10:27:45 +00:00
|
|
|
#include <Interpreters/JoinedTables.h>
|
2019-07-26 17:43:42 +00:00
|
|
|
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
|
2022-01-13 17:23:44 +00:00
|
|
|
#include <Interpreters/AddDefaultDatabaseVisitor.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/TreeRewriter.h>
|
2020-05-20 20:16:32 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2018-12-19 12:38:13 +00:00
|
|
|
#include <Interpreters/createBlockSelector.h>
|
2017-12-30 00:36:06 +00:00
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
|
|
|
#include <Interpreters/getClusterName.h>
|
2020-04-01 14:21:37 +00:00
|
|
|
#include <Interpreters/getTableExpressions.h>
|
2022-02-15 23:14:09 +00:00
|
|
|
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
2023-01-19 10:26:38 +00:00
|
|
|
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
|
|
|
|
|
2020-06-16 18:49:04 +00:00
|
|
|
#include <Functions/IFunction.h>
|
2022-02-18 14:42:48 +00:00
|
|
|
#include <TableFunctions/TableFunctionView.h>
|
|
|
|
#include <TableFunctions/TableFunctionFactory.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
|
2023-01-09 12:30:32 +00:00
|
|
|
#include <Storages/IStorageCluster.h>
|
|
|
|
|
2022-01-10 19:01:41 +00:00
|
|
|
#include <Processors/Executors/PushingPipelineExecutor.h>
|
2021-09-08 18:29:38 +00:00
|
|
|
#include <Processors/QueryPlan/QueryPlan.h>
|
2021-04-04 10:27:45 +00:00
|
|
|
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
|
|
|
|
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
|
2021-03-09 19:00:38 +00:00
|
|
|
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
|
2022-12-23 17:45:28 +00:00
|
|
|
#include <Processors/QueryPlan/ExpressionStep.h>
|
2021-03-09 19:00:38 +00:00
|
|
|
#include <Processors/Sources/NullSource.h>
|
2021-07-14 13:17:30 +00:00
|
|
|
#include <Processors/Sources/RemoteSource.h>
|
2021-07-23 14:25:35 +00:00
|
|
|
#include <Processors/Sinks/EmptySink.h>
|
2021-03-09 19:00:38 +00:00
|
|
|
|
2020-06-13 16:31:28 +00:00
|
|
|
#include <Core/Settings.h>
|
2023-01-24 10:46:47 +00:00
|
|
|
#include <Core/SettingsEnums.h>
|
2012-05-21 20:38:34 +00:00
|
|
|
|
2018-06-05 19:46:49 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
2020-11-09 19:07:38 +00:00
|
|
|
#include <IO/WriteBufferFromString.h>
|
|
|
|
#include <IO/Operators.h>
|
2021-04-04 10:27:45 +00:00
|
|
|
#include <IO/ConnectionTimeoutsContext.h>
|
2018-06-05 19:46:49 +00:00
|
|
|
|
2015-02-10 21:10:58 +00:00
|
|
|
#include <memory>
|
2019-07-31 22:37:41 +00:00
|
|
|
#include <filesystem>
|
2020-04-22 21:44:22 +00:00
|
|
|
#include <optional>
|
2020-09-18 19:25:56 +00:00
|
|
|
#include <cassert>
|
2017-05-10 06:39:37 +00:00
|
|
|
|
2016-12-12 03:33:34 +00:00
|
|
|
|
2021-04-27 00:05:43 +00:00
|
|
|
namespace fs = std::filesystem;
|
|
|
|
|
2020-01-23 17:48:05 +00:00
|
|
|
namespace
|
|
|
|
{
|
2020-03-09 01:03:43 +00:00
|
|
|
const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY = 1;
|
|
|
|
const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS = 2;
|
2020-08-15 13:25:30 +00:00
|
|
|
|
|
|
|
const UInt64 DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION = 2;
|
2021-04-04 10:27:45 +00:00
|
|
|
|
|
|
|
const UInt64 PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL = 2;
|
2020-01-23 17:48:05 +00:00
|
|
|
}
|
|
|
|
|
2021-01-27 18:43:41 +00:00
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
|
|
|
extern const Event DistributedRejectedInserts;
|
|
|
|
extern const Event DistributedDelayedInserts;
|
|
|
|
extern const Event DistributedDelayedInsertsMilliseconds;
|
|
|
|
}
|
|
|
|
|
2012-05-21 20:38:34 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2021-03-09 19:00:38 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
2016-01-11 21:46:36 +00:00
|
|
|
extern const int STORAGE_REQUIRES_PARAMETER;
|
2017-11-03 19:53:10 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2017-12-30 00:36:06 +00:00
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
|
|
|
extern const int INCORRECT_NUMBER_OF_COLUMNS;
|
2018-03-16 02:08:31 +00:00
|
|
|
extern const int INFINITE_LOOP;
|
2021-12-09 10:39:28 +00:00
|
|
|
extern const int ILLEGAL_FINAL;
|
2018-06-05 19:46:49 +00:00
|
|
|
extern const int TYPE_MISMATCH;
|
2018-12-19 12:38:13 +00:00
|
|
|
extern const int TOO_MANY_ROWS;
|
2020-01-23 17:48:05 +00:00
|
|
|
extern const int UNABLE_TO_SKIP_UNUSED_SHARDS;
|
2021-02-02 02:25:19 +00:00
|
|
|
extern const int INVALID_SHARD_ID;
|
2021-02-28 05:24:39 +00:00
|
|
|
extern const int ALTER_OF_COLUMN_IS_FORBIDDEN;
|
2021-01-26 18:45:37 +00:00
|
|
|
extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES;
|
2021-01-27 18:43:41 +00:00
|
|
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
2022-03-08 14:24:39 +00:00
|
|
|
extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
|
2022-12-23 17:45:28 +00:00
|
|
|
extern const int UNSUPPORTED_METHOD;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
2019-04-08 05:13:16 +00:00
|
|
|
namespace ActionLocks
|
|
|
|
{
|
2019-04-22 15:11:16 +00:00
|
|
|
extern const StorageActionBlockType DistributedSend;
|
2019-04-08 05:13:16 +00:00
|
|
|
}
|
2016-01-11 21:46:36 +00:00
|
|
|
|
2014-08-21 12:07:29 +00:00
|
|
|
namespace
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-07-25 12:31:47 +00:00
|
|
|
/// select query has database, table and table function names as AST pointers
|
|
|
|
/// Creates a copy of query, changes database, table and table function names.
|
2022-01-13 17:23:44 +00:00
|
|
|
ASTPtr rewriteSelectQuery(
|
|
|
|
ContextPtr context,
|
|
|
|
const ASTPtr & query,
|
|
|
|
const std::string & remote_database,
|
|
|
|
const std::string & remote_table,
|
|
|
|
ASTPtr table_function_ptr = nullptr)
|
2017-05-10 06:39:37 +00:00
|
|
|
{
|
2018-02-25 00:50:53 +00:00
|
|
|
auto modified_query_ast = query->clone();
|
2019-07-26 17:43:42 +00:00
|
|
|
|
|
|
|
ASTSelectQuery & select_query = modified_query_ast->as<ASTSelectQuery &>();
|
2020-09-16 09:57:26 +00:00
|
|
|
|
|
|
|
// Get rid of the settings clause so we don't send them to remote. Thus newly non-important
|
|
|
|
// settings won't break any remote parser. It's also more reasonable since the query settings
|
|
|
|
// are written into the query context and will be sent by the query pipeline.
|
|
|
|
select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, {});
|
|
|
|
|
2020-04-01 14:21:37 +00:00
|
|
|
if (table_function_ptr)
|
|
|
|
select_query.addTableFunction(table_function_ptr);
|
|
|
|
else
|
2022-01-13 17:23:44 +00:00
|
|
|
select_query.replaceDatabaseAndTable(remote_database, remote_table);
|
2019-07-26 17:43:42 +00:00
|
|
|
|
2020-04-01 14:21:37 +00:00
|
|
|
/// Restore long column names (cause our short names are ambiguous).
|
|
|
|
/// TODO: aliased table functions & CREATE TABLE AS table function cases
|
|
|
|
if (!table_function_ptr)
|
2019-07-26 17:43:42 +00:00
|
|
|
{
|
|
|
|
RestoreQualifiedNamesVisitor::Data data;
|
2020-04-01 14:21:37 +00:00
|
|
|
data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query->as<ASTSelectQuery &>(), 0));
|
2022-01-13 17:23:44 +00:00
|
|
|
data.remote_table.database = remote_database;
|
|
|
|
data.remote_table.table = remote_table;
|
2020-04-01 14:21:37 +00:00
|
|
|
RestoreQualifiedNamesVisitor(data).visit(modified_query_ast);
|
2019-07-26 17:43:42 +00:00
|
|
|
}
|
|
|
|
|
2022-01-13 17:23:44 +00:00
|
|
|
/// To make local JOIN works, default database should be added to table names.
|
|
|
|
/// But only for JOIN section, since the following should work using default_database:
|
|
|
|
/// - SELECT * FROM d WHERE value IN (SELECT l.value FROM l) ORDER BY value
|
|
|
|
/// (see 01487_distributed_in_not_default_db)
|
|
|
|
AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase(),
|
|
|
|
/* only_replace_current_database_function_= */false,
|
|
|
|
/* only_replace_in_join_= */true);
|
|
|
|
visitor.visit(modified_query_ast);
|
|
|
|
|
2017-05-10 06:39:37 +00:00
|
|
|
return modified_query_ast;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Calculate maximum number in file names in directory and all subdirectories.
|
|
|
|
/// To ensure global order of data blocks yet to be sent across server restarts.
|
2019-01-04 12:10:00 +00:00
|
|
|
UInt64 getMaximumFileNumber(const std::string & dir_path)
|
2017-05-10 06:39:37 +00:00
|
|
|
{
|
|
|
|
UInt64 res = 0;
|
|
|
|
|
2019-07-31 22:37:41 +00:00
|
|
|
std::filesystem::recursive_directory_iterator begin(dir_path);
|
|
|
|
std::filesystem::recursive_directory_iterator end;
|
2017-05-10 06:39:37 +00:00
|
|
|
for (auto it = begin; it != end; ++it)
|
2014-08-19 08:04:13 +00:00
|
|
|
{
|
2019-01-04 12:10:00 +00:00
|
|
|
const auto & file_path = it->path();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-07-31 22:37:41 +00:00
|
|
|
if (!std::filesystem::is_regular_file(*it) || !endsWith(file_path.filename().string(), ".bin"))
|
2017-05-10 06:39:37 +00:00
|
|
|
continue;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-10 06:39:37 +00:00
|
|
|
UInt64 num = 0;
|
|
|
|
try
|
|
|
|
{
|
2019-01-04 12:10:00 +00:00
|
|
|
num = parse<UInt64>(file_path.filename().stem().string());
|
2017-05-10 06:39:37 +00:00
|
|
|
}
|
|
|
|
catch (Exception & e)
|
|
|
|
{
|
2019-01-04 12:10:00 +00:00
|
|
|
e.addMessage("Unexpected file name " + file_path.filename().string() + " found at " + file_path.parent_path().string() + ", should have numeric base name.");
|
2017-05-10 06:39:37 +00:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num > res)
|
|
|
|
res = num;
|
2014-08-13 12:52:30 +00:00
|
|
|
}
|
2017-05-10 06:39:37 +00:00
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2019-08-19 20:28:24 +00:00
|
|
|
std::string makeFormattedListOfShards(const ClusterPtr & cluster)
|
|
|
|
{
|
2020-11-09 19:07:38 +00:00
|
|
|
WriteBufferFromOwnString buf;
|
2019-08-19 20:28:24 +00:00
|
|
|
|
|
|
|
bool head = true;
|
2020-11-09 19:07:38 +00:00
|
|
|
buf << "[";
|
2019-08-19 20:28:24 +00:00
|
|
|
for (const auto & shard_info : cluster->getShardsInfo())
|
|
|
|
{
|
2020-11-09 19:07:38 +00:00
|
|
|
(head ? buf : buf << ", ") << shard_info.shard_num;
|
2019-08-19 20:28:24 +00:00
|
|
|
head = false;
|
|
|
|
}
|
2020-11-09 19:07:38 +00:00
|
|
|
buf << "]";
|
2019-08-19 20:28:24 +00:00
|
|
|
|
2020-11-09 19:07:38 +00:00
|
|
|
return buf.str();
|
2019-08-19 20:28:24 +00:00
|
|
|
}
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
ExpressionActionsPtr buildShardingKeyExpression(const ASTPtr & sharding_key, ContextPtr context, const NamesAndTypesList & columns, bool project)
|
2020-03-22 10:37:35 +00:00
|
|
|
{
|
|
|
|
ASTPtr query = sharding_key;
|
2020-07-22 17:13:05 +00:00
|
|
|
auto syntax_result = TreeRewriter(context).analyze(query, columns);
|
2020-03-22 10:37:35 +00:00
|
|
|
return ExpressionAnalyzer(query, syntax_result, context).getActions(project);
|
2014-08-13 12:52:30 +00:00
|
|
|
}
|
|
|
|
|
2021-06-28 17:02:22 +00:00
|
|
|
bool isExpressionActionsDeterministic(const ExpressionActionsPtr & actions)
|
2020-06-16 18:49:04 +00:00
|
|
|
{
|
|
|
|
for (const auto & action : actions->getActions())
|
|
|
|
{
|
2020-11-10 14:54:59 +00:00
|
|
|
if (action.node->type != ActionsDAG::ActionType::FUNCTION)
|
2020-06-16 18:49:04 +00:00
|
|
|
continue;
|
2020-11-03 11:28:28 +00:00
|
|
|
if (!action.node->function_base->isDeterministic())
|
2020-06-16 18:49:04 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-22 10:37:35 +00:00
|
|
|
class ReplacingConstantExpressionsMatcher
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Data = Block;
|
2014-08-13 12:52:30 +00:00
|
|
|
|
2020-03-22 10:37:35 +00:00
|
|
|
static bool needChildVisit(ASTPtr &, const ASTPtr &)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void visit(ASTPtr & node, Block & block_with_constants)
|
|
|
|
{
|
|
|
|
if (!node->as<ASTFunction>())
|
|
|
|
return;
|
2017-06-06 18:48:38 +00:00
|
|
|
|
2020-03-22 10:37:35 +00:00
|
|
|
std::string name = node->getColumnName();
|
|
|
|
if (block_with_constants.has(name))
|
|
|
|
{
|
|
|
|
auto result = block_with_constants.getByName(name);
|
|
|
|
if (!isColumnConst(*result.column))
|
|
|
|
return;
|
|
|
|
|
2020-03-23 17:28:38 +00:00
|
|
|
node = std::make_shared<ASTLiteral>(assert_cast<const ColumnConst &>(*result.column).getField());
|
2020-03-22 10:37:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
2020-03-23 17:28:38 +00:00
|
|
|
|
2020-06-18 09:08:24 +00:00
|
|
|
void replaceConstantExpressions(
|
|
|
|
ASTPtr & node,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context,
|
2020-06-18 09:08:24 +00:00
|
|
|
const NamesAndTypesList & columns,
|
|
|
|
ConstStoragePtr storage,
|
2021-07-23 16:47:43 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot)
|
2018-11-08 15:43:14 +00:00
|
|
|
{
|
2021-07-23 16:47:43 +00:00
|
|
|
auto syntax_result = TreeRewriter(context).analyze(node, columns, storage, storage_snapshot);
|
2020-03-22 10:37:35 +00:00
|
|
|
Block block_with_constants = KeyCondition::getBlockWithConstants(node, syntax_result, context);
|
|
|
|
|
|
|
|
InDepthNodeVisitor<ReplacingConstantExpressionsMatcher, true> visitor(block_with_constants);
|
|
|
|
visitor.visit(node);
|
2018-11-08 15:43:14 +00:00
|
|
|
}
|
2017-06-06 18:48:38 +00:00
|
|
|
|
2020-04-22 21:44:22 +00:00
|
|
|
size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & cluster)
|
|
|
|
{
|
2020-04-01 18:38:01 +00:00
|
|
|
size_t num_local_shards = cluster->getLocalShardCount();
|
|
|
|
size_t num_remote_shards = cluster->getRemoteShardCount();
|
2021-12-09 10:39:28 +00:00
|
|
|
return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas;
|
2020-04-01 18:38:01 +00:00
|
|
|
}
|
|
|
|
|
2023-01-24 10:46:47 +00:00
|
|
|
bool canUseCustomKey(const Settings & settings, const Cluster & cluster)
|
2023-01-19 13:24:35 +00:00
|
|
|
{
|
|
|
|
return settings.max_parallel_replicas > 1 && settings.parallel_replicas_mode == ParallelReplicasMode::CUSTOM_KEY
|
|
|
|
&& cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
|
|
|
|
}
|
2020-03-22 10:37:35 +00:00
|
|
|
|
2023-01-24 10:46:47 +00:00
|
|
|
}
|
2020-03-22 10:37:35 +00:00
|
|
|
|
|
|
|
/// For destruction of std::unique_ptr of type that is incomplete in class definition.
|
|
|
|
StorageDistributed::~StorageDistributed() = default;
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2020-04-24 09:20:09 +00:00
|
|
|
|
2020-04-28 10:38:57 +00:00
|
|
|
NamesAndTypesList StorageDistributed::getVirtuals() const
|
2020-04-27 13:55:30 +00:00
|
|
|
{
|
|
|
|
/// NOTE This is weird. Most of these virtual columns are part of MergeTree
|
|
|
|
/// tables info. But Distributed is general-purpose engine.
|
2020-04-28 10:38:57 +00:00
|
|
|
return NamesAndTypesList{
|
2022-02-05 08:10:28 +00:00
|
|
|
NameAndTypePair("_table", std::make_shared<DataTypeString>()),
|
|
|
|
NameAndTypePair("_part", std::make_shared<DataTypeString>()),
|
|
|
|
NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
|
|
|
|
NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
|
|
|
|
NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
|
|
|
|
NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
|
2022-03-15 06:34:25 +00:00
|
|
|
NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
|
2022-07-18 07:36:28 +00:00
|
|
|
NameAndTypePair("_row_exists", std::make_shared<DataTypeUInt8>()),
|
2022-02-05 08:10:28 +00:00
|
|
|
NameAndTypePair("_shard_num", std::make_shared<DataTypeUInt32>()), /// deprecated
|
2020-04-27 13:55:30 +00:00
|
|
|
};
|
|
|
|
}
|
2020-04-24 09:20:09 +00:00
|
|
|
|
2014-09-30 03:08:47 +00:00
|
|
|
StorageDistributed::StorageDistributed(
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & id_,
|
2018-03-06 20:18:34 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2019-08-24 21:20:20 +00:00
|
|
|
const ConstraintsDescription & constraints_,
|
2021-04-23 12:18:23 +00:00
|
|
|
const String & comment,
|
2014-09-30 03:08:47 +00:00
|
|
|
const String & remote_database_,
|
|
|
|
const String & remote_table_,
|
2016-10-10 08:44:52 +00:00
|
|
|
const String & cluster_name_,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context_,
|
2014-09-30 03:08:47 +00:00
|
|
|
const ASTPtr & sharding_key_,
|
2020-07-23 14:10:48 +00:00
|
|
|
const String & storage_policy_name_,
|
2019-10-25 19:07:47 +00:00
|
|
|
const String & relative_data_path_,
|
2021-01-07 14:14:41 +00:00
|
|
|
const DistributedSettings & distributed_settings_,
|
2020-10-14 12:19:29 +00:00
|
|
|
bool attach_,
|
2021-08-20 14:05:53 +00:00
|
|
|
ClusterPtr owned_cluster_,
|
|
|
|
ASTPtr remote_table_function_ptr_)
|
2020-04-27 13:55:30 +00:00
|
|
|
: IStorage(id_)
|
2021-04-10 23:33:54 +00:00
|
|
|
, WithContext(context_->getGlobalContext())
|
2019-12-04 16:06:55 +00:00
|
|
|
, remote_database(remote_database_)
|
|
|
|
, remote_table(remote_table_)
|
2021-08-20 14:05:53 +00:00
|
|
|
, remote_table_function_ptr(remote_table_function_ptr_)
|
2020-05-30 21:57:37 +00:00
|
|
|
, log(&Poco::Logger::get("StorageDistributed (" + id_.table_name + ")"))
|
2020-10-14 12:19:29 +00:00
|
|
|
, owned_cluster(std::move(owned_cluster_))
|
2021-04-10 23:33:54 +00:00
|
|
|
, cluster_name(getContext()->getMacros()->expand(cluster_name_))
|
2019-12-04 16:06:55 +00:00
|
|
|
, has_sharding_key(sharding_key_)
|
2020-01-20 17:54:52 +00:00
|
|
|
, relative_data_path(relative_data_path_)
|
2021-01-07 14:14:41 +00:00
|
|
|
, distributed_settings(distributed_settings_)
|
2020-12-23 16:04:05 +00:00
|
|
|
, rng(randomSeed())
|
2014-09-30 03:08:47 +00:00
|
|
|
{
|
2020-06-19 15:39:41 +00:00
|
|
|
StorageInMemoryMetadata storage_metadata;
|
2021-12-15 11:30:57 +00:00
|
|
|
if (columns_.empty())
|
|
|
|
{
|
|
|
|
StorageID id = StorageID::createEmpty();
|
|
|
|
id.table_name = remote_table;
|
|
|
|
id.database_name = remote_database;
|
|
|
|
storage_metadata.setColumns(getStructureOfRemoteTable(*getCluster(), id, getContext(), remote_table_function_ptr));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
storage_metadata.setColumns(columns_);
|
|
|
|
|
2020-06-19 15:39:41 +00:00
|
|
|
storage_metadata.setConstraints(constraints_);
|
2021-04-23 12:18:23 +00:00
|
|
|
storage_metadata.setComment(comment);
|
2020-06-19 15:39:41 +00:00
|
|
|
setInMemoryMetadata(storage_metadata);
|
2019-08-24 21:20:20 +00:00
|
|
|
|
2019-08-26 13:46:07 +00:00
|
|
|
if (sharding_key_)
|
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
sharding_key_expr = buildShardingKeyExpression(sharding_key_, getContext(), storage_metadata.getColumns().getAllPhysical(), false);
|
2019-08-26 13:46:07 +00:00
|
|
|
sharding_key_column_name = sharding_key_->getColumnName();
|
2021-06-28 17:02:22 +00:00
|
|
|
sharding_key_is_deterministic = isExpressionActionsDeterministic(sharding_key_expr);
|
2019-08-26 13:46:07 +00:00
|
|
|
}
|
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
if (!relative_data_path.empty())
|
2020-07-23 14:10:48 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
storage_policy = getContext()->getStoragePolicy(storage_policy_name_);
|
2020-09-15 09:26:56 +00:00
|
|
|
data_volume = storage_policy->getVolume(0);
|
|
|
|
if (storage_policy->getVolumes().size() > 1)
|
|
|
|
LOG_WARNING(log, "Storage policy for Distributed table has multiple volumes. "
|
|
|
|
"Only {} volume will be used to store data. Other will be ignored.", data_volume->getName());
|
2020-07-23 14:10:48 +00:00
|
|
|
}
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2018-03-16 02:08:31 +00:00
|
|
|
/// Sanity check. Skip check if the table is already created to allow the server to start.
|
2021-08-20 11:55:04 +00:00
|
|
|
if (!attach_)
|
2018-03-16 02:08:31 +00:00
|
|
|
{
|
2021-08-20 11:55:04 +00:00
|
|
|
if (remote_database.empty() && !remote_table_function_ptr && !getCluster()->maybeCrossReplication())
|
|
|
|
LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly.");
|
|
|
|
|
|
|
|
size_t num_local_shards = getCluster()->getLocalShardCount();
|
|
|
|
if (num_local_shards && (remote_database.empty() || remote_database == id_.database_name) && remote_table == id_.table_name)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::INFINITE_LOOP, "Distributed table {} looks at itself", id_.table_name);
|
2018-03-16 02:08:31 +00:00
|
|
|
}
|
2023-01-23 08:51:32 +00:00
|
|
|
|
|
|
|
initializeFromDisk();
|
2014-09-30 03:08:47 +00:00
|
|
|
}
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2018-07-24 13:10:34 +00:00
|
|
|
StorageDistributed::StorageDistributed(
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & id_,
|
2018-03-12 13:47:01 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2019-08-24 21:20:20 +00:00
|
|
|
const ConstraintsDescription & constraints_,
|
2018-07-24 13:10:34 +00:00
|
|
|
ASTPtr remote_table_function_ptr_,
|
|
|
|
const String & cluster_name_,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context_,
|
2018-07-24 13:10:34 +00:00
|
|
|
const ASTPtr & sharding_key_,
|
2020-07-23 14:10:48 +00:00
|
|
|
const String & storage_policy_name_,
|
2019-10-25 19:07:47 +00:00
|
|
|
const String & relative_data_path_,
|
2021-01-07 14:14:41 +00:00
|
|
|
const DistributedSettings & distributed_settings_,
|
2020-10-14 12:19:29 +00:00
|
|
|
bool attach,
|
|
|
|
ClusterPtr owned_cluster_)
|
2021-04-23 12:18:23 +00:00
|
|
|
: StorageDistributed(
|
|
|
|
id_,
|
|
|
|
columns_,
|
|
|
|
constraints_,
|
|
|
|
String{},
|
|
|
|
String{},
|
|
|
|
String{},
|
|
|
|
cluster_name_,
|
|
|
|
context_,
|
|
|
|
sharding_key_,
|
|
|
|
storage_policy_name_,
|
|
|
|
relative_data_path_,
|
|
|
|
distributed_settings_,
|
|
|
|
attach,
|
2021-08-20 14:05:53 +00:00
|
|
|
std::move(owned_cluster_),
|
|
|
|
remote_table_function_ptr_)
|
2018-07-24 13:10:34 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-11-07 21:30:40 +00:00
|
|
|
QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
|
2021-04-22 13:32:17 +00:00
|
|
|
ContextPtr local_context,
|
|
|
|
QueryProcessingStage::Enum to_stage,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2021-04-22 13:32:17 +00:00
|
|
|
SelectQueryInfo & query_info) const
|
2020-04-16 21:54:43 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto & settings = local_context->getSettingsRef();
|
2020-04-30 23:47:19 +00:00
|
|
|
|
2020-09-10 19:55:36 +00:00
|
|
|
ClusterPtr cluster = getCluster();
|
2023-01-17 12:34:42 +00:00
|
|
|
|
2021-08-08 09:38:24 +00:00
|
|
|
size_t nodes = getClusterQueriedNodes(settings, cluster);
|
|
|
|
|
2023-01-24 10:46:47 +00:00
|
|
|
const auto use_virtual_shards = [&]
|
|
|
|
{
|
|
|
|
if (!canUseCustomKey(settings, *cluster))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
auto distributed_table = DatabaseAndTableWithAlias(
|
|
|
|
*getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0), local_context->getCurrentDatabase());
|
|
|
|
|
|
|
|
if (containsCustomKeyForTable(settings.parallel_replicas_custom_key, distributed_table, *local_context))
|
|
|
|
{
|
|
|
|
LOG_INFO(log, "Found custom_key for {}", distributed_table.getQualifiedNamePrefix(false));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
DatabaseAndTableWithAlias remote_table_info;
|
|
|
|
remote_table_info.database = remote_database;
|
|
|
|
remote_table_info.table = remote_table;
|
|
|
|
if (containsCustomKeyForTable(settings.parallel_replicas_custom_key, remote_table_info, *local_context))
|
|
|
|
{
|
|
|
|
LOG_INFO(log, "Found custom_key for {}", remote_table_info.getQualifiedNamePrefix(false));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
if (use_virtual_shards())
|
2020-09-10 19:55:36 +00:00
|
|
|
{
|
2023-01-19 11:28:26 +00:00
|
|
|
LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into virtual shards");
|
2021-08-08 09:38:24 +00:00
|
|
|
|
2023-01-19 11:28:26 +00:00
|
|
|
query_info.cluster = cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
query_info.cluster = cluster;
|
2021-08-08 09:38:24 +00:00
|
|
|
|
2023-01-19 11:28:26 +00:00
|
|
|
if (nodes > 1 && settings.optimize_skip_unused_shards)
|
2020-09-10 19:55:36 +00:00
|
|
|
{
|
2023-01-19 11:28:26 +00:00
|
|
|
/// Always calculate optimized cluster here, to avoid conditions during read()
|
|
|
|
/// (Anyway it will be calculated in the read())
|
|
|
|
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query);
|
|
|
|
if (optimized_cluster)
|
|
|
|
{
|
|
|
|
LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
|
|
|
|
makeFormattedListOfShards(optimized_cluster));
|
|
|
|
|
|
|
|
cluster = optimized_cluster;
|
|
|
|
query_info.optimized_cluster = cluster;
|
|
|
|
|
|
|
|
nodes = getClusterQueriedNodes(settings, cluster);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}",
|
|
|
|
has_sharding_key ? "" : " (no sharding key)");
|
|
|
|
}
|
2020-09-10 19:55:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-22 21:44:22 +00:00
|
|
|
if (settings.distributed_group_by_no_merge)
|
2020-08-15 13:25:30 +00:00
|
|
|
{
|
|
|
|
if (settings.distributed_group_by_no_merge == DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION)
|
2021-06-04 06:43:56 +00:00
|
|
|
{
|
|
|
|
if (settings.distributed_push_down_limit)
|
|
|
|
return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
|
|
|
|
else
|
|
|
|
return QueryProcessingStage::WithMergeableStateAfterAggregation;
|
|
|
|
}
|
2020-08-15 13:25:30 +00:00
|
|
|
else
|
2021-06-04 06:43:56 +00:00
|
|
|
{
|
|
|
|
/// NOTE: distributed_group_by_no_merge=1 does not respect distributed_push_down_limit
|
2021-06-28 17:02:22 +00:00
|
|
|
/// (since in this case queries processed separately and the initiator is just a proxy in this case).
|
2021-08-03 07:10:08 +00:00
|
|
|
if (to_stage != QueryProcessingStage::Complete)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Queries with distributed_group_by_no_merge=1 should be processed to Complete stage");
|
2020-08-15 13:25:30 +00:00
|
|
|
return QueryProcessingStage::Complete;
|
2021-06-04 06:43:56 +00:00
|
|
|
}
|
2020-08-15 13:25:30 +00:00
|
|
|
}
|
2020-04-16 21:54:43 +00:00
|
|
|
|
2020-04-22 21:44:22 +00:00
|
|
|
/// Nested distributed query cannot return Complete stage,
|
|
|
|
/// since the parent query need to aggregate the results after.
|
|
|
|
if (to_stage == QueryProcessingStage::WithMergeableState)
|
|
|
|
return QueryProcessingStage::WithMergeableState;
|
|
|
|
|
|
|
|
/// If there is only one node, the query can be fully processed by the
|
|
|
|
/// shard, initiator will work as a proxy only.
|
2021-08-08 09:38:24 +00:00
|
|
|
if (nodes == 1)
|
2021-08-03 07:10:08 +00:00
|
|
|
{
|
|
|
|
/// In case the query was processed to
|
|
|
|
/// WithMergeableStateAfterAggregation/WithMergeableStateAfterAggregationAndLimit
|
|
|
|
/// (which are greater the Complete stage)
|
|
|
|
/// we cannot return Complete (will break aliases and similar),
|
|
|
|
/// relevant for Distributed over Distributed
|
|
|
|
return std::max(to_stage, QueryProcessingStage::Complete);
|
|
|
|
}
|
2021-08-08 09:38:24 +00:00
|
|
|
else if (nodes == 0)
|
|
|
|
{
|
|
|
|
/// In case of 0 shards, the query should be processed fully on the initiator,
|
|
|
|
/// since we need to apply aggregations.
|
|
|
|
/// That's why we need to return FetchColumns.
|
|
|
|
return QueryProcessingStage::FetchColumns;
|
|
|
|
}
|
2020-04-22 21:44:22 +00:00
|
|
|
|
2021-07-17 13:31:06 +00:00
|
|
|
auto optimized_stage = getOptimizedQueryProcessingStage(query_info, settings);
|
|
|
|
if (optimized_stage)
|
2021-08-03 07:10:08 +00:00
|
|
|
{
|
|
|
|
if (*optimized_stage == QueryProcessingStage::Complete)
|
|
|
|
return std::min(to_stage, *optimized_stage);
|
2021-07-17 13:31:06 +00:00
|
|
|
return *optimized_stage;
|
2021-08-03 07:10:08 +00:00
|
|
|
}
|
2020-04-22 21:44:22 +00:00
|
|
|
|
2021-07-17 13:31:06 +00:00
|
|
|
return QueryProcessingStage::WithMergeableState;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const
|
|
|
|
{
|
|
|
|
bool optimize_sharding_key_aggregation =
|
|
|
|
settings.optimize_skip_unused_shards &&
|
2020-04-22 21:44:22 +00:00
|
|
|
settings.optimize_distributed_group_by_sharding_key &&
|
|
|
|
has_sharding_key &&
|
2021-07-17 13:31:06 +00:00
|
|
|
(settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic);
|
|
|
|
|
|
|
|
QueryProcessingStage::Enum default_stage = QueryProcessingStage::WithMergeableStateAfterAggregation;
|
|
|
|
if (settings.distributed_push_down_limit)
|
|
|
|
default_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
|
|
|
|
|
|
|
|
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
|
|
|
|
|
|
|
auto expr_contains_sharding_key = [&](const auto & exprs) -> bool
|
2020-04-22 21:44:22 +00:00
|
|
|
{
|
2021-07-17 13:31:06 +00:00
|
|
|
std::unordered_set<std::string> expr_columns;
|
|
|
|
for (auto & expr : exprs)
|
2020-04-22 21:44:22 +00:00
|
|
|
{
|
2021-07-17 13:31:06 +00:00
|
|
|
auto id = expr->template as<ASTIdentifier>();
|
|
|
|
if (!id)
|
|
|
|
continue;
|
|
|
|
expr_columns.emplace(id->name());
|
2020-04-22 21:44:22 +00:00
|
|
|
}
|
2021-07-17 13:31:06 +00:00
|
|
|
|
|
|
|
for (const auto & column : sharding_key_expr->getRequiredColumns())
|
2020-04-22 21:44:22 +00:00
|
|
|
{
|
2021-07-17 13:31:06 +00:00
|
|
|
if (!expr_columns.contains(column))
|
|
|
|
return false;
|
2020-04-22 21:44:22 +00:00
|
|
|
}
|
2021-07-17 13:31:06 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
// GROUP BY qualifiers
|
|
|
|
// - TODO: WITH TOTALS can be implemented
|
|
|
|
// - TODO: WITH ROLLUP can be implemented (I guess)
|
|
|
|
if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
|
|
|
|
return {};
|
|
|
|
// Window functions are not supported.
|
|
|
|
if (query_info.has_window)
|
|
|
|
return {};
|
|
|
|
// TODO: extremes support can be implemented
|
|
|
|
if (settings.extremes)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
// DISTINCT
|
|
|
|
if (select.distinct)
|
|
|
|
{
|
|
|
|
if (!optimize_sharding_key_aggregation || !expr_contains_sharding_key(select.select()->children))
|
|
|
|
return {};
|
2020-04-22 21:44:22 +00:00
|
|
|
}
|
|
|
|
|
2021-07-17 13:31:06 +00:00
|
|
|
// GROUP BY
|
|
|
|
const ASTPtr group_by = select.groupBy();
|
2022-12-23 17:45:28 +00:00
|
|
|
|
|
|
|
bool has_aggregates = query_info.has_aggregates;
|
|
|
|
if (query_info.syntax_analyzer_result)
|
2023-01-13 16:53:53 +00:00
|
|
|
has_aggregates = !query_info.syntax_analyzer_result->aggregates.empty();
|
2022-12-23 17:45:28 +00:00
|
|
|
|
2023-01-13 16:53:53 +00:00
|
|
|
if (has_aggregates || group_by)
|
2021-07-17 13:31:06 +00:00
|
|
|
{
|
|
|
|
if (!optimize_sharding_key_aggregation || !group_by || !expr_contains_sharding_key(group_by->children))
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2021-07-17 17:21:13 +00:00
|
|
|
// LIMIT BY
|
|
|
|
if (const ASTPtr limit_by = select.limitBy())
|
|
|
|
{
|
|
|
|
if (!optimize_sharding_key_aggregation || !expr_contains_sharding_key(limit_by->children))
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2021-07-17 13:31:06 +00:00
|
|
|
// ORDER BY
|
2021-07-17 17:21:13 +00:00
|
|
|
if (const ASTPtr order_by = select.orderBy())
|
2021-07-17 13:31:06 +00:00
|
|
|
return default_stage;
|
|
|
|
|
|
|
|
// LIMIT
|
|
|
|
// OFFSET
|
2021-07-17 17:21:13 +00:00
|
|
|
if (select.limitLength() || select.limitOffset())
|
2021-07-17 13:31:06 +00:00
|
|
|
return default_stage;
|
|
|
|
|
|
|
|
// Only simple SELECT FROM GROUP BY sharding_key can use Complete state.
|
|
|
|
return QueryProcessingStage::Complete;
|
2020-03-18 00:57:00 +00:00
|
|
|
}
|
|
|
|
|
2022-02-15 23:14:09 +00:00
|
|
|
static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr query)
|
|
|
|
{
|
2022-05-06 14:44:00 +00:00
|
|
|
if (!hasDynamicSubcolumns(all_columns))
|
2022-02-15 23:14:09 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!query)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
RequiredSourceColumnsVisitor::Data columns_context;
|
|
|
|
RequiredSourceColumnsVisitor(columns_context).visit(query);
|
|
|
|
|
|
|
|
auto required_columns = columns_context.requiredColumns();
|
|
|
|
for (const auto & required_column : required_columns)
|
|
|
|
{
|
|
|
|
auto name_in_storage = Nested::splitName(required_column).first;
|
|
|
|
auto column_in_storage = all_columns.tryGetPhysical(name_in_storage);
|
|
|
|
|
2022-05-06 14:44:00 +00:00
|
|
|
if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns())
|
2022-02-15 23:14:09 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-03-17 17:26:18 +00:00
|
|
|
StorageSnapshotPtr StorageDistributed::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const
|
2022-02-15 23:14:09 +00:00
|
|
|
{
|
2022-03-17 17:26:18 +00:00
|
|
|
return getStorageSnapshotForQuery(metadata_snapshot, nullptr, query_context);
|
2022-02-15 23:14:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery(
|
2022-03-17 17:26:18 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query, ContextPtr /*query_context*/) const
|
2021-07-12 14:54:02 +00:00
|
|
|
{
|
2022-03-10 21:24:19 +00:00
|
|
|
/// If query doesn't use columns of type Object, don't deduce
|
2022-03-01 16:32:55 +00:00
|
|
|
/// concrete types for them, because it required extra round trip.
|
2021-07-24 00:55:50 +00:00
|
|
|
auto snapshot_data = std::make_unique<SnapshotData>();
|
2022-02-15 23:14:09 +00:00
|
|
|
if (!requiresObjectColumns(metadata_snapshot->getColumns(), query))
|
2021-07-24 00:55:50 +00:00
|
|
|
return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, ColumnsDescription{}, std::move(snapshot_data));
|
2021-07-12 14:54:02 +00:00
|
|
|
|
2021-07-23 16:30:18 +00:00
|
|
|
snapshot_data->objects_by_shard = getExtendedObjectsOfRemoteTables(
|
|
|
|
*getCluster(),
|
|
|
|
StorageID{remote_database, remote_table},
|
2022-02-09 20:47:53 +00:00
|
|
|
metadata_snapshot->getColumns(),
|
2021-07-23 16:30:18 +00:00
|
|
|
getContext());
|
|
|
|
|
2022-05-06 14:44:00 +00:00
|
|
|
auto object_columns = DB::getConcreteObjectColumns(
|
2022-02-09 20:47:53 +00:00
|
|
|
snapshot_data->objects_by_shard.begin(),
|
|
|
|
snapshot_data->objects_by_shard.end(),
|
|
|
|
metadata_snapshot->getColumns(),
|
2022-03-01 17:20:53 +00:00
|
|
|
[](const auto & shard_num_and_columns) -> const auto & { return shard_num_and_columns.second; });
|
2021-07-12 14:54:02 +00:00
|
|
|
|
2021-07-23 16:30:18 +00:00
|
|
|
return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, object_columns, std::move(snapshot_data));
|
2021-07-12 14:54:02 +00:00
|
|
|
}
|
|
|
|
|
2023-01-10 11:52:29 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2022-12-23 17:45:28 +00:00
|
|
|
QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const SelectQueryInfo & query_info, StorageID remote_storage_id)
|
|
|
|
{
|
|
|
|
const auto & query_context = query_info.planner_context->getQueryContext();
|
|
|
|
auto resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id);
|
|
|
|
auto storage = DatabaseCatalog::instance().tryGetTable(resolved_remote_storage_id, query_context);
|
|
|
|
if (!storage)
|
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
|
|
"Distributed local table {} does not exists on coordinator",
|
|
|
|
remote_storage_id.getFullTableName());
|
|
|
|
|
|
|
|
auto storage_lock = storage->lockForShare(query_context->getInitialQueryId(), query_context->getSettingsRef().lock_acquire_timeout);
|
|
|
|
auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), query_context);
|
|
|
|
auto replacement_table_expression = std::make_shared<TableNode>(std::move(storage), std::move(storage_lock), std::move(storage_snapshot));
|
|
|
|
|
|
|
|
std::unordered_map<const IQueryTreeNode *, QueryTreeNodePtr> replacement_map;
|
|
|
|
replacement_map.emplace(query_info.table_expression.get(), std::move(replacement_table_expression));
|
|
|
|
|
|
|
|
return query_info.query_tree->cloneAndReplace(replacement_map);
|
|
|
|
}
|
|
|
|
|
2023-01-10 11:52:29 +00:00
|
|
|
}
|
|
|
|
|
2020-09-25 13:19:26 +00:00
|
|
|
void StorageDistributed::read(
|
2020-09-18 14:16:53 +00:00
|
|
|
QueryPlan & query_plan,
|
2022-01-10 18:21:24 +00:00
|
|
|
const Names &,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr local_context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
|
|
|
const size_t /*max_block_size*/,
|
2022-10-07 10:46:45 +00:00
|
|
|
const size_t /*num_streams*/)
|
2018-04-19 14:47:09 +00:00
|
|
|
{
|
2021-12-09 10:39:28 +00:00
|
|
|
const auto * select_query = query_info.query->as<ASTSelectQuery>();
|
|
|
|
if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas)
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature");
|
|
|
|
|
2022-12-23 17:45:28 +00:00
|
|
|
Block header;
|
|
|
|
ASTPtr query_ast;
|
2015-02-10 20:48:17 +00:00
|
|
|
|
2022-12-23 17:45:28 +00:00
|
|
|
if (local_context->getSettingsRef().allow_experimental_analyzer)
|
|
|
|
{
|
|
|
|
StorageID remote_storage_id{remote_database, remote_table};
|
|
|
|
auto query_tree_with_replaced_distributed_table = buildQueryTreeDistributedTableReplacedWithLocalTable(query_info, remote_storage_id);
|
|
|
|
query_ast = queryNodeToSelectQuery(query_tree_with_replaced_distributed_table);
|
|
|
|
Planner planner(query_tree_with_replaced_distributed_table, SelectQueryOptions(processed_stage), PlannerConfiguration{.only_analyze = true});
|
|
|
|
planner.buildQueryPlanIfNeeded();
|
|
|
|
header = planner.getQueryPlan().getCurrentDataStream().header;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
header =
|
|
|
|
InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
|
|
|
|
query_ast = query_info.query;
|
|
|
|
}
|
2015-02-10 20:48:17 +00:00
|
|
|
|
2022-12-23 17:45:28 +00:00
|
|
|
auto modified_query_ast = rewriteSelectQuery(
|
|
|
|
local_context, query_ast,
|
|
|
|
remote_database, remote_table, remote_table_function_ptr);
|
2018-02-15 18:54:12 +00:00
|
|
|
|
2021-03-09 19:00:38 +00:00
|
|
|
/// Return directly (with correct header) if no shard to query.
|
2021-03-29 19:02:34 +00:00
|
|
|
if (query_info.getCluster()->getShardsInfo().empty())
|
2021-03-09 19:00:38 +00:00
|
|
|
{
|
|
|
|
Pipe pipe(std::make_shared<NullSource>(header));
|
|
|
|
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
|
|
|
read_from_pipe->setStepDescription("Read from NullSource (Distributed)");
|
|
|
|
query_plan.addStep(std::move(read_from_pipe));
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2018-02-15 18:54:12 +00:00
|
|
|
|
2021-07-15 16:15:16 +00:00
|
|
|
StorageID main_table = StorageID::createEmpty();
|
|
|
|
if (!remote_table_function_ptr)
|
|
|
|
main_table = StorageID{remote_database, remote_table};
|
|
|
|
|
2021-07-23 16:30:18 +00:00
|
|
|
const auto & snapshot_data = assert_cast<const SnapshotData &>(*storage_snapshot->data);
|
2021-07-15 16:15:16 +00:00
|
|
|
ClusterProxy::SelectStreamFactory select_stream_factory =
|
|
|
|
ClusterProxy::SelectStreamFactory(
|
2021-04-10 23:33:54 +00:00
|
|
|
header,
|
2021-07-23 16:30:18 +00:00
|
|
|
snapshot_data.objects_by_shard,
|
|
|
|
storage_snapshot,
|
2022-01-10 18:21:24 +00:00
|
|
|
processed_stage);
|
2015-11-06 17:44:01 +00:00
|
|
|
|
2022-06-02 09:46:33 +00:00
|
|
|
auto settings = local_context->getSettingsRef();
|
|
|
|
|
2023-01-19 08:13:59 +00:00
|
|
|
ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
|
2023-01-24 10:46:47 +00:00
|
|
|
if (canUseCustomKey(settings, *getCluster()))
|
2023-01-17 12:34:42 +00:00
|
|
|
{
|
2023-01-24 10:46:47 +00:00
|
|
|
const auto get_custom_key_ast = [&]() -> ASTPtr
|
2023-01-19 11:28:26 +00:00
|
|
|
{
|
2023-01-24 10:46:47 +00:00
|
|
|
auto distributed_table = DatabaseAndTableWithAlias(
|
|
|
|
*getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0), local_context->getCurrentDatabase());
|
|
|
|
if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, distributed_table, *local_context))
|
|
|
|
return custom_key_ast;
|
|
|
|
|
|
|
|
DatabaseAndTableWithAlias remote_table_info;
|
|
|
|
remote_table_info.database = remote_database;
|
|
|
|
remote_table_info.table = remote_table;
|
|
|
|
if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, remote_table_info, *local_context))
|
|
|
|
return custom_key_ast;
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
};
|
2023-01-19 08:13:59 +00:00
|
|
|
|
2023-01-24 10:46:47 +00:00
|
|
|
if (auto custom_key_ast = get_custom_key_ast())
|
2023-01-17 12:34:42 +00:00
|
|
|
{
|
2023-01-24 10:46:47 +00:00
|
|
|
if (query_info.getCluster()->getShardCount() == 1)
|
|
|
|
{
|
|
|
|
// we are reading from single shard with multiple replicas but didn't transform replicas
|
|
|
|
// into virtual shards with custom_key set
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicas weren't transformed into virtual shards");
|
|
|
|
}
|
|
|
|
|
|
|
|
additional_shard_filter_generator =
|
|
|
|
[&, custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr
|
|
|
|
{
|
|
|
|
return getCustomKeyFilterForParallelReplica(
|
|
|
|
shard_count, shard_num - 1, custom_key_ast, settings.parallel_replicas_custom_key_filter_type, *this, local_context);
|
|
|
|
};
|
|
|
|
}
|
2023-01-17 12:34:42 +00:00
|
|
|
}
|
|
|
|
|
2023-01-19 11:28:26 +00:00
|
|
|
bool parallel_replicas = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas
|
|
|
|
&& !settings.use_hedged_requests && settings.parallel_replicas_mode == ParallelReplicasMode::READ_TASKS;
|
|
|
|
|
2022-06-02 09:46:33 +00:00
|
|
|
if (parallel_replicas)
|
|
|
|
ClusterProxy::executeQueryWithParallelReplicas(
|
|
|
|
query_plan, main_table, remote_table_function_ptr,
|
|
|
|
select_stream_factory, modified_query_ast,
|
|
|
|
local_context, query_info,
|
|
|
|
sharding_key_expr, sharding_key_column_name,
|
2022-11-27 23:41:31 +00:00
|
|
|
query_info.cluster, processed_stage);
|
2022-06-02 09:46:33 +00:00
|
|
|
else
|
|
|
|
ClusterProxy::executeQuery(
|
|
|
|
query_plan, header, processed_stage,
|
|
|
|
main_table, remote_table_function_ptr,
|
|
|
|
select_stream_factory, log, modified_query_ast,
|
|
|
|
local_context, query_info,
|
|
|
|
sharding_key_expr, sharding_key_column_name,
|
2023-01-19 08:13:59 +00:00
|
|
|
query_info.cluster, additional_shard_filter_generator);
|
2021-03-09 19:00:38 +00:00
|
|
|
|
|
|
|
/// This is a bug, it is possible only when there is no shards to query, and this is handled earlier.
|
|
|
|
if (!query_plan.isInitialized())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline is not initialized");
|
2022-12-23 17:45:28 +00:00
|
|
|
|
|
|
|
if (local_context->getSettingsRef().allow_experimental_analyzer)
|
|
|
|
{
|
|
|
|
Planner planner(query_info.query_tree, SelectQueryOptions(processed_stage), PlannerConfiguration{.only_analyze = true});
|
|
|
|
planner.buildQueryPlanIfNeeded();
|
|
|
|
auto expected_header = planner.getQueryPlan().getCurrentDataStream().header;
|
|
|
|
|
|
|
|
auto rename_actions_dag = ActionsDAG::makeConvertingActions(
|
|
|
|
query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
|
|
|
|
expected_header.getColumnsWithTypeAndName(),
|
|
|
|
ActionsDAG::MatchColumnsMode::Position,
|
|
|
|
true /*ignore_constant_values*/);
|
|
|
|
auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(rename_actions_dag));
|
|
|
|
rename_step->setStepDescription("Change remote column names to local column names");
|
|
|
|
query_plan.addStep(std::move(rename_step));
|
|
|
|
}
|
2012-05-21 20:38:34 +00:00
|
|
|
}
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2021-07-23 14:25:35 +00:00
|
|
|
SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context)
|
2014-08-12 13:46:46 +00:00
|
|
|
{
|
2018-03-16 02:08:31 +00:00
|
|
|
auto cluster = getCluster();
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto & settings = local_context->getSettingsRef();
|
2016-10-10 08:44:52 +00:00
|
|
|
|
2018-01-25 12:18:27 +00:00
|
|
|
/// Ban an attempt to make async insert into the table belonging to DatabaseMemory
|
2021-02-02 02:25:19 +00:00
|
|
|
if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync && !settings.insert_shard_id)
|
2018-01-25 12:18:27 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage {} must have own data directory to enable asynchronous inserts",
|
|
|
|
getName());
|
2018-01-25 12:18:27 +00:00
|
|
|
}
|
2016-10-10 08:44:52 +00:00
|
|
|
|
2021-02-02 02:25:19 +00:00
|
|
|
auto shard_num = cluster->getLocalShardCount() + cluster->getRemoteShardCount();
|
|
|
|
|
2018-01-25 12:18:27 +00:00
|
|
|
/// If sharding key is not specified, then you can only write to a shard containing only one shard
|
2021-02-02 02:25:19 +00:00
|
|
|
if (!settings.insert_shard_id && !settings.insert_distributed_one_random_shard && !has_sharding_key && shard_num >= 2)
|
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::STORAGE_REQUIRES_PARAMETER,
|
|
|
|
"Method write is not supported by storage {} with more than one shard and no sharding key provided", getName());
|
2021-02-02 02:25:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (settings.insert_shard_id && settings.insert_shard_id > shard_num)
|
2018-01-25 12:18:27 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::INVALID_SHARD_ID, "Shard id should be range from 1 to shard number");
|
2018-01-25 12:18:27 +00:00
|
|
|
}
|
2014-08-21 12:07:29 +00:00
|
|
|
|
2018-01-25 12:18:27 +00:00
|
|
|
/// Force sync insertion if it is remote() table function
|
2021-02-02 02:25:19 +00:00
|
|
|
bool insert_sync = settings.insert_distributed_sync || settings.insert_shard_id || owned_cluster;
|
2017-11-02 14:01:11 +00:00
|
|
|
auto timeout = settings.insert_distributed_timeout;
|
|
|
|
|
2021-10-03 20:06:31 +00:00
|
|
|
Names columns_to_send;
|
|
|
|
if (settings.insert_allow_materialized_columns)
|
|
|
|
columns_to_send = metadata_snapshot->getSampleBlock().getNames();
|
2021-04-20 06:48:42 +00:00
|
|
|
else
|
2021-10-03 20:06:31 +00:00
|
|
|
columns_to_send = metadata_snapshot->getSampleBlockNonMaterialized().getNames();
|
2021-04-20 06:48:42 +00:00
|
|
|
|
2022-05-09 19:13:02 +00:00
|
|
|
/// DistributedSink will not own cluster, but will own ConnectionPools of the cluster
|
2021-07-23 14:25:35 +00:00
|
|
|
return std::make_shared<DistributedSink>(
|
2021-10-03 20:06:31 +00:00
|
|
|
local_context, *this, metadata_snapshot, cluster, insert_sync, timeout,
|
|
|
|
StorageID{remote_database, remote_table}, columns_to_send);
|
2014-08-12 13:46:46 +00:00
|
|
|
}
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2023-01-09 12:30:32 +00:00
|
|
|
std::optional<QueryPipeline> StorageDistributed::distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr local_context) const
|
2021-04-04 10:27:45 +00:00
|
|
|
{
|
2023-01-09 12:30:32 +00:00
|
|
|
const auto & settings = local_context->getSettingsRef();
|
2021-04-04 10:27:45 +00:00
|
|
|
auto new_query = std::dynamic_pointer_cast<ASTInsertQuery>(query.clone());
|
2023-01-09 12:30:32 +00:00
|
|
|
|
|
|
|
/// Unwrap view() function.
|
|
|
|
if (src_distributed.remote_table_function_ptr)
|
2021-04-04 10:27:45 +00:00
|
|
|
{
|
2023-01-09 12:30:32 +00:00
|
|
|
const TableFunctionPtr src_table_function =
|
|
|
|
TableFunctionFactory::instance().get(src_distributed.remote_table_function_ptr, local_context);
|
|
|
|
const TableFunctionView * view_function =
|
|
|
|
assert_cast<const TableFunctionView *>(src_table_function.get());
|
|
|
|
new_query->select = view_function->getSelectQuery().clone();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
|
|
|
|
select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
|
2021-04-04 10:27:45 +00:00
|
|
|
|
2023-01-09 12:30:32 +00:00
|
|
|
auto * select = query.select->as<ASTSelectWithUnionQuery &>().list_of_selects->children.at(0)->as<ASTSelectQuery>();
|
|
|
|
auto new_select_query = std::dynamic_pointer_cast<ASTSelectQuery>(select->clone());
|
|
|
|
select_with_union_query->list_of_selects->children.push_back(new_select_query);
|
|
|
|
|
|
|
|
new_select_query->replaceDatabaseAndTable(src_distributed.getRemoteDatabaseName(), src_distributed.getRemoteTableName());
|
|
|
|
|
|
|
|
new_query->select = select_with_union_query;
|
2021-04-04 10:27:45 +00:00
|
|
|
}
|
|
|
|
|
2023-01-09 12:30:32 +00:00
|
|
|
const Cluster::AddressesWithFailover & src_addresses = src_distributed.getCluster()->getShardsAddresses();
|
2022-03-08 14:24:39 +00:00
|
|
|
const Cluster::AddressesWithFailover & dst_addresses = getCluster()->getShardsAddresses();
|
|
|
|
/// Compare addresses instead of cluster name, to handle remote()/cluster().
|
|
|
|
/// (since for remote()/cluster() the getClusterName() is empty string)
|
|
|
|
if (src_addresses != dst_addresses)
|
2021-04-04 10:27:45 +00:00
|
|
|
{
|
2022-03-08 14:24:39 +00:00
|
|
|
/// The warning should be produced only for root queries,
|
|
|
|
/// since in case of parallel_distributed_insert_select=1,
|
|
|
|
/// it will produce warning for the rewritten insert,
|
|
|
|
/// since destination table is still Distributed there.
|
|
|
|
if (local_context->getClientInfo().distributed_depth == 0)
|
|
|
|
{
|
|
|
|
LOG_WARNING(log,
|
|
|
|
"Parallel distributed INSERT SELECT is not possible "
|
|
|
|
"(source cluster={} ({} addresses), destination cluster={} ({} addresses))",
|
2023-01-09 12:30:32 +00:00
|
|
|
src_distributed.getClusterName(),
|
2022-03-08 14:24:39 +00:00
|
|
|
src_addresses.size(),
|
|
|
|
getClusterName(),
|
|
|
|
dst_addresses.size());
|
|
|
|
}
|
2022-05-20 19:49:31 +00:00
|
|
|
return {};
|
2021-04-04 10:27:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (settings.parallel_distributed_insert_select == PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL)
|
|
|
|
{
|
|
|
|
new_query->table_id = StorageID(getRemoteDatabaseName(), getRemoteTableName());
|
2022-03-08 14:24:39 +00:00
|
|
|
/// Reset table function for INSERT INTO remote()/cluster()
|
|
|
|
new_query->table_function.reset();
|
2021-04-04 10:27:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const auto & cluster = getCluster();
|
|
|
|
const auto & shards_info = cluster->getShardsInfo();
|
|
|
|
|
2021-12-09 21:55:14 +00:00
|
|
|
String new_query_str;
|
|
|
|
{
|
|
|
|
WriteBufferFromOwnString buf;
|
|
|
|
IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true);
|
|
|
|
ast_format_settings.always_quote_identifiers = true;
|
|
|
|
new_query->IAST::format(ast_format_settings);
|
|
|
|
new_query_str = buf.str();
|
|
|
|
}
|
|
|
|
|
2023-01-09 12:30:32 +00:00
|
|
|
QueryPipeline pipeline;
|
2022-03-08 14:24:39 +00:00
|
|
|
ContextMutablePtr query_context = Context::createCopy(local_context);
|
|
|
|
++query_context->getClientInfo().distributed_depth;
|
|
|
|
|
2021-06-15 19:55:21 +00:00
|
|
|
for (size_t shard_index : collections::range(0, shards_info.size()))
|
2021-04-04 10:27:45 +00:00
|
|
|
{
|
|
|
|
const auto & shard_info = shards_info[shard_index];
|
|
|
|
if (shard_info.isLocal())
|
|
|
|
{
|
2022-03-08 14:24:39 +00:00
|
|
|
InterpreterInsertQuery interpreter(new_query, query_context);
|
2022-05-20 19:49:31 +00:00
|
|
|
pipeline.addCompletedPipeline(interpreter.execute().pipeline);
|
2021-04-04 10:27:45 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings);
|
|
|
|
auto connections = shard_info.pool->getMany(timeouts, &settings, PoolMode::GET_ONE);
|
|
|
|
if (connections.empty() || connections.front().isNull())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}",
|
|
|
|
shard_info.shard_num);
|
2021-04-04 10:27:45 +00:00
|
|
|
|
|
|
|
/// INSERT SELECT query returns empty block
|
2021-07-14 13:17:30 +00:00
|
|
|
auto remote_query_executor
|
2022-03-08 14:24:39 +00:00
|
|
|
= std::make_shared<RemoteQueryExecutor>(shard_info.pool, std::move(connections), new_query_str, Block{}, query_context);
|
2022-05-20 19:49:31 +00:00
|
|
|
QueryPipeline remote_pipeline(std::make_shared<RemoteSource>(remote_query_executor, false, settings.async_socket_for_remote));
|
|
|
|
remote_pipeline.complete(std::make_shared<EmptySink>(remote_query_executor->getHeader()));
|
|
|
|
|
|
|
|
pipeline.addCompletedPipeline(std::move(remote_pipeline));
|
2021-04-04 10:27:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-20 19:49:31 +00:00
|
|
|
return pipeline;
|
2021-04-04 10:27:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-01-09 12:30:32 +00:00
|
|
|
std::optional<QueryPipeline> StorageDistributed::distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr local_context) const
|
|
|
|
{
|
|
|
|
const auto & settings = local_context->getSettingsRef();
|
|
|
|
auto & select = query.select->as<ASTSelectWithUnionQuery &>();
|
|
|
|
/// Select query is needed for pruining on virtual columns
|
|
|
|
auto extension = src_storage_cluster.getTaskIteratorExtension(
|
|
|
|
select.list_of_selects->children.at(0)->as<ASTSelectQuery>()->clone(),
|
|
|
|
local_context);
|
|
|
|
|
|
|
|
auto dst_cluster = getCluster();
|
|
|
|
|
|
|
|
auto new_query = std::dynamic_pointer_cast<ASTInsertQuery>(query.clone());
|
|
|
|
if (settings.parallel_distributed_insert_select == PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL)
|
|
|
|
{
|
|
|
|
new_query->table_id = StorageID(getRemoteDatabaseName(), getRemoteTableName());
|
|
|
|
/// Reset table function for INSERT INTO remote()/cluster()
|
|
|
|
new_query->table_function.reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
String new_query_str;
|
|
|
|
{
|
|
|
|
WriteBufferFromOwnString buf;
|
|
|
|
IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true);
|
|
|
|
ast_format_settings.always_quote_identifiers = true;
|
|
|
|
new_query->IAST::format(ast_format_settings);
|
|
|
|
new_query_str = buf.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
QueryPipeline pipeline;
|
|
|
|
ContextMutablePtr query_context = Context::createCopy(local_context);
|
|
|
|
++query_context->getClientInfo().distributed_depth;
|
|
|
|
|
|
|
|
/// Here we take addresses from destination cluster and assume source table exists on these nodes
|
|
|
|
for (const auto & replicas : getCluster()->getShardsAddresses())
|
|
|
|
{
|
|
|
|
/// There will be only one replica, because we consider each replica as a shard
|
|
|
|
for (const auto & node : replicas)
|
|
|
|
{
|
|
|
|
auto connection = std::make_shared<Connection>(
|
|
|
|
node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(),
|
|
|
|
node.user, node.password, node.quota_key, node.cluster, node.cluster_secret,
|
|
|
|
"ParallelInsertSelectInititiator",
|
|
|
|
node.compression,
|
|
|
|
node.secure
|
|
|
|
);
|
|
|
|
|
|
|
|
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
|
|
|
connection,
|
|
|
|
new_query_str,
|
|
|
|
Block{},
|
|
|
|
query_context,
|
|
|
|
/*throttler=*/nullptr,
|
|
|
|
Scalars{},
|
|
|
|
Tables{},
|
|
|
|
QueryProcessingStage::Complete,
|
|
|
|
extension);
|
|
|
|
|
|
|
|
QueryPipeline remote_pipeline(std::make_shared<RemoteSource>(remote_query_executor, false, settings.async_socket_for_remote));
|
|
|
|
remote_pipeline.complete(std::make_shared<EmptySink>(remote_query_executor->getHeader()));
|
|
|
|
|
|
|
|
pipeline.addCompletedPipeline(std::move(remote_pipeline));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return pipeline;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::optional<QueryPipeline> StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context)
|
|
|
|
{
|
|
|
|
const Settings & settings = local_context->getSettingsRef();
|
|
|
|
if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded");
|
2023-01-09 12:30:32 +00:00
|
|
|
|
|
|
|
auto & select = query.select->as<ASTSelectWithUnionQuery &>();
|
|
|
|
|
|
|
|
StoragePtr src_storage;
|
|
|
|
|
|
|
|
/// Distributed write only works in the most trivial case INSERT ... SELECT
|
|
|
|
/// without any unions or joins on the right side
|
|
|
|
if (select.list_of_selects->children.size() == 1)
|
|
|
|
{
|
|
|
|
if (auto * select_query = select.list_of_selects->children.at(0)->as<ASTSelectQuery>())
|
|
|
|
{
|
|
|
|
JoinedTables joined_tables(Context::createCopy(local_context), *select_query);
|
|
|
|
|
|
|
|
if (joined_tables.tablesCount() == 1)
|
|
|
|
{
|
|
|
|
src_storage = joined_tables.getLeftTableStorage();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!src_storage)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
if (auto src_distributed = std::dynamic_pointer_cast<StorageDistributed>(src_storage))
|
|
|
|
{
|
|
|
|
return distributedWriteBetweenDistributedTables(*src_distributed, query, local_context);
|
|
|
|
}
|
|
|
|
if (auto src_storage_cluster = std::dynamic_pointer_cast<IStorageCluster>(src_storage))
|
|
|
|
{
|
|
|
|
return distributedWriteFromClusterStorage(*src_storage_cluster, query, local_context);
|
|
|
|
}
|
|
|
|
if (local_context->getClientInfo().distributed_depth == 0)
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parallel distributed INSERT SELECT is not possible. "\
|
2023-01-23 21:13:58 +00:00
|
|
|
"Reason: distributed reading is supported only from Distributed engine "
|
|
|
|
"or *Cluster table functions, but got {} storage", src_storage->getName());
|
2023-01-09 12:30:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const
|
2013-09-23 12:01:19 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
auto name_deps = getDependentViewsByColumn(local_context);
|
2019-12-26 18:17:05 +00:00
|
|
|
for (const auto & command : commands)
|
|
|
|
{
|
2021-10-29 12:31:18 +00:00
|
|
|
if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN
|
|
|
|
&& command.type != AlterCommand::Type::DROP_COLUMN && command.type != AlterCommand::Type::COMMENT_COLUMN
|
|
|
|
&& command.type != AlterCommand::Type::RENAME_COLUMN && command.type != AlterCommand::Type::COMMENT_TABLE)
|
2016-05-13 21:08:19 +00:00
|
|
|
|
2021-09-06 14:24:03 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}",
|
|
|
|
command.type, getName());
|
|
|
|
|
2021-04-30 05:02:32 +00:00
|
|
|
if (command.type == AlterCommand::DROP_COLUMN && !command.clear)
|
2021-02-28 05:24:39 +00:00
|
|
|
{
|
2021-02-28 07:42:08 +00:00
|
|
|
const auto & deps_mv = name_deps[command.column_name];
|
2021-02-28 05:24:39 +00:00
|
|
|
if (!deps_mv.empty())
|
|
|
|
{
|
2023-01-17 00:19:44 +00:00
|
|
|
throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN,
|
|
|
|
"Trying to ALTER DROP column {} which is referenced by materialized view {}",
|
|
|
|
backQuoteIfNeed(command.column_name), toString(deps_mv));
|
2021-02-28 05:24:39 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-26 18:17:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-25 17:49:49 +00:00
|
|
|
void StorageDistributed::alter(const AlterCommands & params, ContextPtr local_context, AlterLockHolder &)
|
2013-09-23 12:01:19 +00:00
|
|
|
{
|
2019-12-10 20:47:05 +00:00
|
|
|
auto table_id = getStorageID();
|
2019-08-26 14:50:34 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
checkAlterIsPossible(params, local_context);
|
2020-06-09 17:28:29 +00:00
|
|
|
StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
|
2021-04-10 23:33:54 +00:00
|
|
|
params.apply(new_metadata, local_context);
|
|
|
|
DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata);
|
2020-06-15 16:55:33 +00:00
|
|
|
setInMemoryMetadata(new_metadata);
|
2013-09-23 12:01:19 +00:00
|
|
|
}
|
2014-02-04 15:44:15 +00:00
|
|
|
|
2023-01-23 08:51:32 +00:00
|
|
|
void StorageDistributed::initializeFromDisk()
|
2017-06-06 17:06:14 +00:00
|
|
|
{
|
2020-07-23 14:10:48 +00:00
|
|
|
if (!storage_policy)
|
2020-01-20 17:54:52 +00:00
|
|
|
return;
|
|
|
|
|
2021-06-24 07:07:31 +00:00
|
|
|
const auto & disks = data_volume->getDisks();
|
2021-06-27 15:22:34 +00:00
|
|
|
|
|
|
|
/// Make initialization for large number of disks parallel.
|
2021-06-24 07:07:31 +00:00
|
|
|
ThreadPool pool(disks.size());
|
|
|
|
|
|
|
|
for (const DiskPtr & disk : disks)
|
|
|
|
{
|
|
|
|
pool.scheduleOrThrowOnError([&]()
|
|
|
|
{
|
|
|
|
createDirectoryMonitors(disk);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
pool.wait();
|
|
|
|
|
|
|
|
const auto & paths = getDataPaths();
|
|
|
|
std::vector<UInt64> last_increment(paths.size());
|
|
|
|
for (size_t i = 0; i < paths.size(); ++i)
|
|
|
|
{
|
|
|
|
pool.scheduleOrThrowOnError([&, i]()
|
|
|
|
{
|
|
|
|
last_increment[i] = getMaximumFileNumber(paths[i]);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
pool.wait();
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2021-06-24 07:07:31 +00:00
|
|
|
for (const auto inc : last_increment)
|
2020-01-20 17:54:52 +00:00
|
|
|
{
|
|
|
|
if (inc > file_names_increment.value)
|
|
|
|
file_names_increment.value.store(inc);
|
|
|
|
}
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_DEBUG(log, "Auto-increment is {}", file_names_increment.value);
|
2017-06-06 17:06:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-08-13 11:26:13 +00:00
|
|
|
void StorageDistributed::shutdown()
|
|
|
|
{
|
2020-04-24 23:03:27 +00:00
|
|
|
monitors_blocker.cancelForever();
|
|
|
|
|
2020-04-24 23:03:26 +00:00
|
|
|
std::lock_guard lock(cluster_nodes_mutex);
|
Fix DROP TABLE for Distributed (racy with INSERT)
<details>
```
drop() on T1275:
0 DB::StorageDistributed::drop (this=0x7f9ed34f0000) at ../contrib/libcxx/include/__hash_table:966
1 0x000000000d557242 in DB::DatabaseOnDisk::dropTable (this=0x7f9fc22706d8, context=..., table_name=...)
at ../contrib/libcxx/include/new:340
2 0x000000000d6fcf7c in DB::InterpreterDropQuery::executeToTable (this=this@entry=0x7f9e42560dc0, query=...)
at ../contrib/libcxx/include/memory:3826
3 0x000000000d6ff5ee in DB::InterpreterDropQuery::execute (this=0x7f9e42560dc0) at ../src/Interpreters/InterpreterDropQuery.cpp:50
4 0x000000000daa40c0 in DB::executeQueryImpl (begin=<optimized out>, end=<optimized out>, context=..., internal=<optimized out>,
stage=DB::QueryProcessingStage::Complete, has_query_tail=false, istr=0x0) at ../src/Interpreters/executeQuery.cpp:420
5 0x000000000daa59df in DB::executeQuery (query=..., context=..., internal=internal@entry=false, stage=<optimized out>,
may_have_embedded_data=<optimized out>) at ../contrib/libcxx/include/string:1487
6 0x000000000e1369e6 in DB::TCPHandler::runImpl (this=this@entry=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:254
7 0x000000000e1379c9 in DB::TCPHandler::run (this=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:1326
8 0x000000001086fac7 in Poco::Net::TCPServerConnection::start (this=this@entry=0x7f9ddf3a9000)
at ../contrib/poco/Net/src/TCPServerConnection.cpp:43
9 0x000000001086ff2b in Poco::Net::TCPServerDispatcher::run (this=0x7f9e4eba5c00)
at ../contrib/poco/Net/src/TCPServerDispatcher.cpp:114
10 0x00000000109dbe8e in Poco::PooledThread::run (this=0x7f9e4a2d2f80) at ../contrib/poco/Foundation/src/ThreadPool.cpp:199
11 0x00000000109d78f9 in Poco::ThreadImpl::runnableEntry (pThread=<optimized out>)
at ../contrib/poco/Foundation/include/Poco/SharedPtr.h:401
12 0x00007f9fc3cccea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
13 0x00007f9fc3bebeaf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
StorageDistributedDirectoryMonitor on T166:
0 DB::StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor (this=0x7f9ea7ab1400, storage_=..., path_=...,
pool_=..., monitor_blocker_=..., bg_pool_=...) at ../src/Storages/Distributed/DirectoryMonitor.cpp:81
1 0x000000000dbf684e in std::__1::make_unique<> () at ../contrib/libcxx/include/memory:3474
2 DB::StorageDistributed::requireDirectoryMonitor (this=0x7f9ed34f0000, disk=..., name=...)
at ../src/Storages/StorageDistributed.cpp:682
3 0x000000000de3d5fa in DB::DistributedBlockOutputStream::writeToShard (this=this@entry=0x7f9ed39c7418, block=..., dir_names=...)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:634
4 0x000000000de3e214 in DB::DistributedBlockOutputStream::writeAsyncImpl (this=this@entry=0x7f9ed39c7418, block=...,
shard_id=shard_id@entry=79) at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:539
5 0x000000000de3e47b in DB::DistributedBlockOutputStream::writeSplitAsync (this=this@entry=0x7f9ed39c7418, block=...)
at ../contrib/libcxx/include/vector:1546
6 0x000000000de3eab0 in DB::DistributedBlockOutputStream::writeAsync (block=..., this=0x7f9ed39c7418)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:141
7 DB::DistributedBlockOutputStream::write (this=0x7f9ed39c7418, block=...)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:135
8 0x000000000d73b376 in DB::PushingToViewsBlockOutputStream::write (this=this@entry=0x7f9ea7a8cf58, block=...)
at ../src/DataStreams/PushingToViewsBlockOutputStream.cpp:157
9 0x000000000d7853eb in DB::AddingDefaultBlockOutputStream::write (this=0x7f9ed383d118, block=...)
at ../contrib/libcxx/include/memory:3826
10 0x000000000d740790 in DB::SquashingBlockOutputStream::write (this=0x7f9ed383de18, block=...)
at ../contrib/libcxx/include/memory:3826
11 0x000000000d68c308 in DB::CountingBlockOutputStream::write (this=0x7f9ea7ac6d60, block=...)
at ../contrib/libcxx/include/memory:3826
12 0x000000000ddab449 in DB::StorageBuffer::writeBlockToDestination (this=this@entry=0x7f9fbd56a000, block=..., table=...)
at ../src/Storages/StorageBuffer.cpp:747
13 0x000000000ddabfa6 in DB::StorageBuffer::flushBuffer (this=this@entry=0x7f9fbd56a000, buffer=...,
check_thresholds=check_thresholds@entry=true, locked=locked@entry=false, reset_block_structure=reset_block_structure@entry=false)
at ../src/Storages/StorageBuffer.cpp:661
14 0x000000000ddac415 in DB::StorageBuffer::flushAllBuffers (reset_blocks_structure=false, check_thresholds=true, this=0x7f9fbd56a000)
at ../src/Storages/StorageBuffer.cpp:605
shutdown() on T1275:
0 DB::StorageDistributed::shutdown (this=0x7f9ed34f0000) at ../contrib/libcxx/include/atomic:1612
1 0x000000000d6fd938 in DB::InterpreterDropQuery::executeToTable (this=this@entry=0x7f98530c79a0, query=...)
at ../src/Storages/TableLockHolder.h:12
2 0x000000000d6ff5ee in DB::InterpreterDropQuery::execute (this=0x7f98530c79a0) at ../src/Interpreters/InterpreterDropQuery.cpp:50
3 0x000000000daa40c0 in DB::executeQueryImpl (begin=<optimized out>, end=<optimized out>, context=..., internal=<optimized out>,
stage=DB::QueryProcessingStage::Complete, has_query_tail=false, istr=0x0) at ../src/Interpreters/executeQuery.cpp:420
4 0x000000000daa59df in DB::executeQuery (query=..., context=..., internal=internal@entry=false, stage=<optimized out>,
may_have_embedded_data=<optimized out>) at ../contrib/libcxx/include/string:1487
5 0x000000000e1369e6 in DB::TCPHandler::runImpl (this=this@entry=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:254
6 0x000000000e1379c9 in DB::TCPHandler::run (this=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:1326
7 0x000000001086fac7 in Poco::Net::TCPServerConnection::start (this=this@entry=0x7f9ddf3a9000)
at ../contrib/poco/Net/src/TCPServerConnection.cpp:43
8 0x000000001086ff2b in Poco::Net::TCPServerDispatcher::run (this=0x7f9e4eba5c00)
at ../contrib/poco/Net/src/TCPServerDispatcher.cpp:114
9 0x00000000109dbe8e in Poco::PooledThread::run (this=0x7f9e4a2d2f80) at ../contrib/poco/Foundation/src/ThreadPool.cpp:199
10 0x00000000109d78f9 in Poco::ThreadImpl::runnableEntry (pThread=<optimized out>)
at ../contrib/poco/Foundation/include/Poco/SharedPtr.h:401
11 0x00007f9fc3cccea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
12 0x00007f9fc3bebeaf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
```
</details>
2020-10-26 20:01:06 +00:00
|
|
|
|
|
|
|
LOG_DEBUG(log, "Joining background threads for async INSERT");
|
2017-07-27 15:24:39 +00:00
|
|
|
cluster_nodes_data.clear();
|
Fix DROP TABLE for Distributed (racy with INSERT)
<details>
```
drop() on T1275:
0 DB::StorageDistributed::drop (this=0x7f9ed34f0000) at ../contrib/libcxx/include/__hash_table:966
1 0x000000000d557242 in DB::DatabaseOnDisk::dropTable (this=0x7f9fc22706d8, context=..., table_name=...)
at ../contrib/libcxx/include/new:340
2 0x000000000d6fcf7c in DB::InterpreterDropQuery::executeToTable (this=this@entry=0x7f9e42560dc0, query=...)
at ../contrib/libcxx/include/memory:3826
3 0x000000000d6ff5ee in DB::InterpreterDropQuery::execute (this=0x7f9e42560dc0) at ../src/Interpreters/InterpreterDropQuery.cpp:50
4 0x000000000daa40c0 in DB::executeQueryImpl (begin=<optimized out>, end=<optimized out>, context=..., internal=<optimized out>,
stage=DB::QueryProcessingStage::Complete, has_query_tail=false, istr=0x0) at ../src/Interpreters/executeQuery.cpp:420
5 0x000000000daa59df in DB::executeQuery (query=..., context=..., internal=internal@entry=false, stage=<optimized out>,
may_have_embedded_data=<optimized out>) at ../contrib/libcxx/include/string:1487
6 0x000000000e1369e6 in DB::TCPHandler::runImpl (this=this@entry=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:254
7 0x000000000e1379c9 in DB::TCPHandler::run (this=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:1326
8 0x000000001086fac7 in Poco::Net::TCPServerConnection::start (this=this@entry=0x7f9ddf3a9000)
at ../contrib/poco/Net/src/TCPServerConnection.cpp:43
9 0x000000001086ff2b in Poco::Net::TCPServerDispatcher::run (this=0x7f9e4eba5c00)
at ../contrib/poco/Net/src/TCPServerDispatcher.cpp:114
10 0x00000000109dbe8e in Poco::PooledThread::run (this=0x7f9e4a2d2f80) at ../contrib/poco/Foundation/src/ThreadPool.cpp:199
11 0x00000000109d78f9 in Poco::ThreadImpl::runnableEntry (pThread=<optimized out>)
at ../contrib/poco/Foundation/include/Poco/SharedPtr.h:401
12 0x00007f9fc3cccea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
13 0x00007f9fc3bebeaf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
StorageDistributedDirectoryMonitor on T166:
0 DB::StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor (this=0x7f9ea7ab1400, storage_=..., path_=...,
pool_=..., monitor_blocker_=..., bg_pool_=...) at ../src/Storages/Distributed/DirectoryMonitor.cpp:81
1 0x000000000dbf684e in std::__1::make_unique<> () at ../contrib/libcxx/include/memory:3474
2 DB::StorageDistributed::requireDirectoryMonitor (this=0x7f9ed34f0000, disk=..., name=...)
at ../src/Storages/StorageDistributed.cpp:682
3 0x000000000de3d5fa in DB::DistributedBlockOutputStream::writeToShard (this=this@entry=0x7f9ed39c7418, block=..., dir_names=...)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:634
4 0x000000000de3e214 in DB::DistributedBlockOutputStream::writeAsyncImpl (this=this@entry=0x7f9ed39c7418, block=...,
shard_id=shard_id@entry=79) at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:539
5 0x000000000de3e47b in DB::DistributedBlockOutputStream::writeSplitAsync (this=this@entry=0x7f9ed39c7418, block=...)
at ../contrib/libcxx/include/vector:1546
6 0x000000000de3eab0 in DB::DistributedBlockOutputStream::writeAsync (block=..., this=0x7f9ed39c7418)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:141
7 DB::DistributedBlockOutputStream::write (this=0x7f9ed39c7418, block=...)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:135
8 0x000000000d73b376 in DB::PushingToViewsBlockOutputStream::write (this=this@entry=0x7f9ea7a8cf58, block=...)
at ../src/DataStreams/PushingToViewsBlockOutputStream.cpp:157
9 0x000000000d7853eb in DB::AddingDefaultBlockOutputStream::write (this=0x7f9ed383d118, block=...)
at ../contrib/libcxx/include/memory:3826
10 0x000000000d740790 in DB::SquashingBlockOutputStream::write (this=0x7f9ed383de18, block=...)
at ../contrib/libcxx/include/memory:3826
11 0x000000000d68c308 in DB::CountingBlockOutputStream::write (this=0x7f9ea7ac6d60, block=...)
at ../contrib/libcxx/include/memory:3826
12 0x000000000ddab449 in DB::StorageBuffer::writeBlockToDestination (this=this@entry=0x7f9fbd56a000, block=..., table=...)
at ../src/Storages/StorageBuffer.cpp:747
13 0x000000000ddabfa6 in DB::StorageBuffer::flushBuffer (this=this@entry=0x7f9fbd56a000, buffer=...,
check_thresholds=check_thresholds@entry=true, locked=locked@entry=false, reset_block_structure=reset_block_structure@entry=false)
at ../src/Storages/StorageBuffer.cpp:661
14 0x000000000ddac415 in DB::StorageBuffer::flushAllBuffers (reset_blocks_structure=false, check_thresholds=true, this=0x7f9fbd56a000)
at ../src/Storages/StorageBuffer.cpp:605
shutdown() on T1275:
0 DB::StorageDistributed::shutdown (this=0x7f9ed34f0000) at ../contrib/libcxx/include/atomic:1612
1 0x000000000d6fd938 in DB::InterpreterDropQuery::executeToTable (this=this@entry=0x7f98530c79a0, query=...)
at ../src/Storages/TableLockHolder.h:12
2 0x000000000d6ff5ee in DB::InterpreterDropQuery::execute (this=0x7f98530c79a0) at ../src/Interpreters/InterpreterDropQuery.cpp:50
3 0x000000000daa40c0 in DB::executeQueryImpl (begin=<optimized out>, end=<optimized out>, context=..., internal=<optimized out>,
stage=DB::QueryProcessingStage::Complete, has_query_tail=false, istr=0x0) at ../src/Interpreters/executeQuery.cpp:420
4 0x000000000daa59df in DB::executeQuery (query=..., context=..., internal=internal@entry=false, stage=<optimized out>,
may_have_embedded_data=<optimized out>) at ../contrib/libcxx/include/string:1487
5 0x000000000e1369e6 in DB::TCPHandler::runImpl (this=this@entry=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:254
6 0x000000000e1379c9 in DB::TCPHandler::run (this=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:1326
7 0x000000001086fac7 in Poco::Net::TCPServerConnection::start (this=this@entry=0x7f9ddf3a9000)
at ../contrib/poco/Net/src/TCPServerConnection.cpp:43
8 0x000000001086ff2b in Poco::Net::TCPServerDispatcher::run (this=0x7f9e4eba5c00)
at ../contrib/poco/Net/src/TCPServerDispatcher.cpp:114
9 0x00000000109dbe8e in Poco::PooledThread::run (this=0x7f9e4a2d2f80) at ../contrib/poco/Foundation/src/ThreadPool.cpp:199
10 0x00000000109d78f9 in Poco::ThreadImpl::runnableEntry (pThread=<optimized out>)
at ../contrib/poco/Foundation/include/Poco/SharedPtr.h:401
11 0x00007f9fc3cccea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
12 0x00007f9fc3bebeaf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
```
</details>
2020-10-26 20:01:06 +00:00
|
|
|
LOG_DEBUG(log, "Background threads for async INSERT joined");
|
2014-08-13 11:26:13 +00:00
|
|
|
}
|
2023-01-23 08:51:32 +00:00
|
|
|
|
2020-07-16 20:35:23 +00:00
|
|
|
void StorageDistributed::drop()
|
|
|
|
{
|
Fix DROP TABLE for Distributed (racy with INSERT)
<details>
```
drop() on T1275:
0 DB::StorageDistributed::drop (this=0x7f9ed34f0000) at ../contrib/libcxx/include/__hash_table:966
1 0x000000000d557242 in DB::DatabaseOnDisk::dropTable (this=0x7f9fc22706d8, context=..., table_name=...)
at ../contrib/libcxx/include/new:340
2 0x000000000d6fcf7c in DB::InterpreterDropQuery::executeToTable (this=this@entry=0x7f9e42560dc0, query=...)
at ../contrib/libcxx/include/memory:3826
3 0x000000000d6ff5ee in DB::InterpreterDropQuery::execute (this=0x7f9e42560dc0) at ../src/Interpreters/InterpreterDropQuery.cpp:50
4 0x000000000daa40c0 in DB::executeQueryImpl (begin=<optimized out>, end=<optimized out>, context=..., internal=<optimized out>,
stage=DB::QueryProcessingStage::Complete, has_query_tail=false, istr=0x0) at ../src/Interpreters/executeQuery.cpp:420
5 0x000000000daa59df in DB::executeQuery (query=..., context=..., internal=internal@entry=false, stage=<optimized out>,
may_have_embedded_data=<optimized out>) at ../contrib/libcxx/include/string:1487
6 0x000000000e1369e6 in DB::TCPHandler::runImpl (this=this@entry=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:254
7 0x000000000e1379c9 in DB::TCPHandler::run (this=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:1326
8 0x000000001086fac7 in Poco::Net::TCPServerConnection::start (this=this@entry=0x7f9ddf3a9000)
at ../contrib/poco/Net/src/TCPServerConnection.cpp:43
9 0x000000001086ff2b in Poco::Net::TCPServerDispatcher::run (this=0x7f9e4eba5c00)
at ../contrib/poco/Net/src/TCPServerDispatcher.cpp:114
10 0x00000000109dbe8e in Poco::PooledThread::run (this=0x7f9e4a2d2f80) at ../contrib/poco/Foundation/src/ThreadPool.cpp:199
11 0x00000000109d78f9 in Poco::ThreadImpl::runnableEntry (pThread=<optimized out>)
at ../contrib/poco/Foundation/include/Poco/SharedPtr.h:401
12 0x00007f9fc3cccea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
13 0x00007f9fc3bebeaf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
StorageDistributedDirectoryMonitor on T166:
0 DB::StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor (this=0x7f9ea7ab1400, storage_=..., path_=...,
pool_=..., monitor_blocker_=..., bg_pool_=...) at ../src/Storages/Distributed/DirectoryMonitor.cpp:81
1 0x000000000dbf684e in std::__1::make_unique<> () at ../contrib/libcxx/include/memory:3474
2 DB::StorageDistributed::requireDirectoryMonitor (this=0x7f9ed34f0000, disk=..., name=...)
at ../src/Storages/StorageDistributed.cpp:682
3 0x000000000de3d5fa in DB::DistributedBlockOutputStream::writeToShard (this=this@entry=0x7f9ed39c7418, block=..., dir_names=...)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:634
4 0x000000000de3e214 in DB::DistributedBlockOutputStream::writeAsyncImpl (this=this@entry=0x7f9ed39c7418, block=...,
shard_id=shard_id@entry=79) at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:539
5 0x000000000de3e47b in DB::DistributedBlockOutputStream::writeSplitAsync (this=this@entry=0x7f9ed39c7418, block=...)
at ../contrib/libcxx/include/vector:1546
6 0x000000000de3eab0 in DB::DistributedBlockOutputStream::writeAsync (block=..., this=0x7f9ed39c7418)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:141
7 DB::DistributedBlockOutputStream::write (this=0x7f9ed39c7418, block=...)
at ../src/Storages/Distributed/DistributedBlockOutputStream.cpp:135
8 0x000000000d73b376 in DB::PushingToViewsBlockOutputStream::write (this=this@entry=0x7f9ea7a8cf58, block=...)
at ../src/DataStreams/PushingToViewsBlockOutputStream.cpp:157
9 0x000000000d7853eb in DB::AddingDefaultBlockOutputStream::write (this=0x7f9ed383d118, block=...)
at ../contrib/libcxx/include/memory:3826
10 0x000000000d740790 in DB::SquashingBlockOutputStream::write (this=0x7f9ed383de18, block=...)
at ../contrib/libcxx/include/memory:3826
11 0x000000000d68c308 in DB::CountingBlockOutputStream::write (this=0x7f9ea7ac6d60, block=...)
at ../contrib/libcxx/include/memory:3826
12 0x000000000ddab449 in DB::StorageBuffer::writeBlockToDestination (this=this@entry=0x7f9fbd56a000, block=..., table=...)
at ../src/Storages/StorageBuffer.cpp:747
13 0x000000000ddabfa6 in DB::StorageBuffer::flushBuffer (this=this@entry=0x7f9fbd56a000, buffer=...,
check_thresholds=check_thresholds@entry=true, locked=locked@entry=false, reset_block_structure=reset_block_structure@entry=false)
at ../src/Storages/StorageBuffer.cpp:661
14 0x000000000ddac415 in DB::StorageBuffer::flushAllBuffers (reset_blocks_structure=false, check_thresholds=true, this=0x7f9fbd56a000)
at ../src/Storages/StorageBuffer.cpp:605
shutdown() on T1275:
0 DB::StorageDistributed::shutdown (this=0x7f9ed34f0000) at ../contrib/libcxx/include/atomic:1612
1 0x000000000d6fd938 in DB::InterpreterDropQuery::executeToTable (this=this@entry=0x7f98530c79a0, query=...)
at ../src/Storages/TableLockHolder.h:12
2 0x000000000d6ff5ee in DB::InterpreterDropQuery::execute (this=0x7f98530c79a0) at ../src/Interpreters/InterpreterDropQuery.cpp:50
3 0x000000000daa40c0 in DB::executeQueryImpl (begin=<optimized out>, end=<optimized out>, context=..., internal=<optimized out>,
stage=DB::QueryProcessingStage::Complete, has_query_tail=false, istr=0x0) at ../src/Interpreters/executeQuery.cpp:420
4 0x000000000daa59df in DB::executeQuery (query=..., context=..., internal=internal@entry=false, stage=<optimized out>,
may_have_embedded_data=<optimized out>) at ../contrib/libcxx/include/string:1487
5 0x000000000e1369e6 in DB::TCPHandler::runImpl (this=this@entry=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:254
6 0x000000000e1379c9 in DB::TCPHandler::run (this=0x7f9ddf3a9000) at ../src/Server/TCPHandler.cpp:1326
7 0x000000001086fac7 in Poco::Net::TCPServerConnection::start (this=this@entry=0x7f9ddf3a9000)
at ../contrib/poco/Net/src/TCPServerConnection.cpp:43
8 0x000000001086ff2b in Poco::Net::TCPServerDispatcher::run (this=0x7f9e4eba5c00)
at ../contrib/poco/Net/src/TCPServerDispatcher.cpp:114
9 0x00000000109dbe8e in Poco::PooledThread::run (this=0x7f9e4a2d2f80) at ../contrib/poco/Foundation/src/ThreadPool.cpp:199
10 0x00000000109d78f9 in Poco::ThreadImpl::runnableEntry (pThread=<optimized out>)
at ../contrib/poco/Foundation/include/Poco/SharedPtr.h:401
11 0x00007f9fc3cccea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
12 0x00007f9fc3bebeaf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
```
</details>
2020-10-26 20:01:06 +00:00
|
|
|
// Some INSERT in-between shutdown() and drop() can call
|
|
|
|
// requireDirectoryMonitor() again, so call shutdown() to clear them, but
|
|
|
|
// when the drop() (this function) executed none of INSERT is allowed in
|
|
|
|
// parallel.
|
|
|
|
//
|
|
|
|
// And second time shutdown() should be fast, since none of
|
|
|
|
// DirectoryMonitor should do anything, because ActionBlocker is canceled
|
|
|
|
// (in shutdown()).
|
|
|
|
shutdown();
|
2020-07-16 20:35:23 +00:00
|
|
|
|
2022-04-17 23:02:49 +00:00
|
|
|
// Distributed table without sharding_key does not allows INSERTs
|
2020-07-16 20:35:23 +00:00
|
|
|
if (relative_data_path.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
LOG_DEBUG(log, "Removing pending blocks for async INSERT from filesystem on DROP TABLE");
|
|
|
|
|
2020-09-15 09:26:56 +00:00
|
|
|
auto disks = data_volume->getDisks();
|
2020-07-16 20:35:23 +00:00
|
|
|
for (const auto & disk : disks)
|
2023-01-30 16:00:28 +00:00
|
|
|
{
|
|
|
|
if (!disk->exists(relative_data_path))
|
|
|
|
{
|
|
|
|
LOG_INFO(log, "Path {} is already removed from disk {}", relative_data_path, disk->getName());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-07-16 20:35:23 +00:00
|
|
|
disk->removeRecursive(relative_data_path);
|
2023-01-30 16:00:28 +00:00
|
|
|
}
|
2020-07-16 20:35:23 +00:00
|
|
|
|
|
|
|
LOG_DEBUG(log, "Removed");
|
|
|
|
}
|
2014-08-13 11:26:13 +00:00
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
Strings StorageDistributed::getDataPaths() const
|
|
|
|
{
|
|
|
|
Strings paths;
|
|
|
|
|
|
|
|
if (relative_data_path.empty())
|
|
|
|
return paths;
|
|
|
|
|
2020-09-15 09:26:56 +00:00
|
|
|
for (const DiskPtr & disk : data_volume->getDisks())
|
2020-01-20 17:54:52 +00:00
|
|
|
paths.push_back(disk->getPath() + relative_data_path);
|
|
|
|
|
|
|
|
return paths;
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
|
2018-04-21 00:35:20 +00:00
|
|
|
{
|
2018-06-09 15:48:22 +00:00
|
|
|
std::lock_guard lock(cluster_nodes_mutex);
|
2018-04-21 00:35:20 +00:00
|
|
|
|
2020-07-16 20:35:23 +00:00
|
|
|
LOG_DEBUG(log, "Removing pending blocks for async INSERT from filesystem on TRUNCATE TABLE");
|
|
|
|
|
2018-06-09 15:48:22 +00:00
|
|
|
for (auto it = cluster_nodes_data.begin(); it != cluster_nodes_data.end();)
|
2018-04-21 00:35:20 +00:00
|
|
|
{
|
2021-02-08 19:07:30 +00:00
|
|
|
it->second.directory_monitor->shutdownAndDropAllData();
|
2018-06-09 15:48:22 +00:00
|
|
|
it = cluster_nodes_data.erase(it);
|
2018-04-21 00:35:20 +00:00
|
|
|
}
|
2020-07-16 20:35:23 +00:00
|
|
|
|
|
|
|
LOG_DEBUG(log, "Removed");
|
2018-04-21 00:35:20 +00:00
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2020-03-29 07:43:40 +00:00
|
|
|
StoragePolicyPtr StorageDistributed::getStoragePolicy() const
|
|
|
|
{
|
2020-07-23 14:10:48 +00:00
|
|
|
return storage_policy;
|
2020-03-29 07:43:40 +00:00
|
|
|
}
|
|
|
|
|
2021-01-09 12:26:37 +00:00
|
|
|
void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
|
2014-08-13 11:26:13 +00:00
|
|
|
{
|
2021-01-09 12:26:37 +00:00
|
|
|
const std::string path(disk->getPath() + relative_data_path);
|
2021-04-27 00:05:43 +00:00
|
|
|
fs::create_directories(path);
|
2014-08-14 11:50:36 +00:00
|
|
|
|
2019-07-31 22:37:41 +00:00
|
|
|
std::filesystem::directory_iterator begin(path);
|
|
|
|
std::filesystem::directory_iterator end;
|
2017-05-10 06:39:37 +00:00
|
|
|
for (auto it = begin; it != end; ++it)
|
2020-11-04 18:58:43 +00:00
|
|
|
{
|
|
|
|
const auto & dir_path = it->path();
|
|
|
|
if (std::filesystem::is_directory(dir_path))
|
|
|
|
{
|
|
|
|
const auto & tmp_path = dir_path / "tmp";
|
2023-01-21 21:37:46 +00:00
|
|
|
|
|
|
|
/// "tmp" created by DistributedSink
|
2020-11-04 18:58:43 +00:00
|
|
|
if (std::filesystem::is_directory(tmp_path) && std::filesystem::is_empty(tmp_path))
|
|
|
|
std::filesystem::remove(tmp_path);
|
|
|
|
|
|
|
|
if (std::filesystem::is_empty(dir_path))
|
|
|
|
{
|
2020-11-22 17:13:40 +00:00
|
|
|
LOG_DEBUG(log, "Removing {} (used for async INSERT into Distributed)", dir_path.string());
|
2022-05-09 19:13:02 +00:00
|
|
|
/// Will be created by DistributedSink on demand.
|
2020-11-04 18:58:43 +00:00
|
|
|
std::filesystem::remove(dir_path);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2023-01-21 21:37:46 +00:00
|
|
|
requireDirectoryMonitor(disk, dir_path.filename().string());
|
2020-11-04 18:58:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-08-13 11:26:13 +00:00
|
|
|
}
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2023-01-21 21:37:46 +00:00
|
|
|
StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const DiskPtr & disk, const std::string & name)
|
2017-07-27 15:24:39 +00:00
|
|
|
{
|
2021-01-09 12:26:37 +00:00
|
|
|
const std::string & disk_path = disk->getPath();
|
|
|
|
const std::string key(disk_path + name);
|
2017-07-27 15:24:39 +00:00
|
|
|
|
Fix race in Distributed table startup
Before this patch it was possible to have multiple directory monitors
for the same directory, one from the INSERT context, another one on
storage startup().
Here are an example of logs for this scenario:
2022.12.07 12:12:27.552485 [ 39925 ] {a47fcb32-4f44-4dbd-94fe-0070d4ea0f6b} <Debug> DDLWorker: Executed query: DETACH TABLE inc.dist_urls_in
...
2022.12.07 12:12:33.228449 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> executeQuery: (from 0.0.0.0:0, user: ) /* ddl_entry=query-0000089229 */ ATTACH TABLE inc.dist_urls_in (stage: Complete)
... this is the DirectoryMonitor created from the context of INSERT for the old StoragePtr that had not been destroyed yet (becase of "was 1" this can be done only from the context of INSERT) ...
2022.12.07 12:12:35.556048 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 1)
2022.12.07 12:12:35.556078 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 71004)
2022.12.07 12:12:35.562716 [ 39536 ] {} <Trace> Connection (i13.ch:9000): Connected to ClickHouse server version 22.10.1.
2022.12.07 12:12:35.562750 [ 39536 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
... this is the DirectoryMonitor that created during ATTACH ...
2022.12.07 12:12:35.802080 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 0)
2022.12.07 12:12:35.802107 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 0)
2022.12.07 12:12:35.834216 [ 39265 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
...
2022.12.07 12:12:38.532627 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Sent a batch of 10 files (took 2976 ms).
...
2022.12.07 12:12:38.601051 [ 39265 ] {} <Error> inc.dist_urls_in.DirectoryMonitor: std::exception. Code: 1001, type: std::__1::__fs::filesystem::filesystem_error, e.what() = filesystem error: in file_size: No such file or directory ["/data6/clickhouse/data/inc/dist_urls_in/shard13_replica1/66827403.bin"], Stack trace (when copying this message, always include the lines below):
...
2022.12.07 12:12:54.132837 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> DDLWorker: Executed query: ATTACH TABLE inc.dist_urls_in
And eventually both monitors (for a short period of time, one replaces
another) are trying to process the same batch (current_batch.txt), and
one of them fails because such file had been already removed.
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-01-05 19:46:09 +00:00
|
|
|
std::lock_guard lock(cluster_nodes_mutex);
|
|
|
|
auto & node_data = cluster_nodes_data[key];
|
|
|
|
if (!node_data.directory_monitor)
|
2020-04-14 18:12:08 +00:00
|
|
|
{
|
Fix race in Distributed table startup
Before this patch it was possible to have multiple directory monitors
for the same directory, one from the INSERT context, another one on
storage startup().
Here are an example of logs for this scenario:
2022.12.07 12:12:27.552485 [ 39925 ] {a47fcb32-4f44-4dbd-94fe-0070d4ea0f6b} <Debug> DDLWorker: Executed query: DETACH TABLE inc.dist_urls_in
...
2022.12.07 12:12:33.228449 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> executeQuery: (from 0.0.0.0:0, user: ) /* ddl_entry=query-0000089229 */ ATTACH TABLE inc.dist_urls_in (stage: Complete)
... this is the DirectoryMonitor created from the context of INSERT for the old StoragePtr that had not been destroyed yet (becase of "was 1" this can be done only from the context of INSERT) ...
2022.12.07 12:12:35.556048 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 1)
2022.12.07 12:12:35.556078 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 71004)
2022.12.07 12:12:35.562716 [ 39536 ] {} <Trace> Connection (i13.ch:9000): Connected to ClickHouse server version 22.10.1.
2022.12.07 12:12:35.562750 [ 39536 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
... this is the DirectoryMonitor that created during ATTACH ...
2022.12.07 12:12:35.802080 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 0)
2022.12.07 12:12:35.802107 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 0)
2022.12.07 12:12:35.834216 [ 39265 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
...
2022.12.07 12:12:38.532627 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Sent a batch of 10 files (took 2976 ms).
...
2022.12.07 12:12:38.601051 [ 39265 ] {} <Error> inc.dist_urls_in.DirectoryMonitor: std::exception. Code: 1001, type: std::__1::__fs::filesystem::filesystem_error, e.what() = filesystem error: in file_size: No such file or directory ["/data6/clickhouse/data/inc/dist_urls_in/shard13_replica1/66827403.bin"], Stack trace (when copying this message, always include the lines below):
...
2022.12.07 12:12:54.132837 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> DDLWorker: Executed query: ATTACH TABLE inc.dist_urls_in
And eventually both monitors (for a short period of time, one replaces
another) are trying to process the same batch (current_batch.txt), and
one of them fails because such file had been already removed.
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-01-05 19:46:09 +00:00
|
|
|
node_data.connection_pool = StorageDistributedDirectoryMonitor::createPool(name, *this);
|
|
|
|
node_data.directory_monitor = std::make_unique<StorageDistributedDirectoryMonitor>(
|
2021-01-09 12:26:37 +00:00
|
|
|
*this, disk, relative_data_path + name,
|
Fix race in Distributed table startup
Before this patch it was possible to have multiple directory monitors
for the same directory, one from the INSERT context, another one on
storage startup().
Here are an example of logs for this scenario:
2022.12.07 12:12:27.552485 [ 39925 ] {a47fcb32-4f44-4dbd-94fe-0070d4ea0f6b} <Debug> DDLWorker: Executed query: DETACH TABLE inc.dist_urls_in
...
2022.12.07 12:12:33.228449 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> executeQuery: (from 0.0.0.0:0, user: ) /* ddl_entry=query-0000089229 */ ATTACH TABLE inc.dist_urls_in (stage: Complete)
... this is the DirectoryMonitor created from the context of INSERT for the old StoragePtr that had not been destroyed yet (becase of "was 1" this can be done only from the context of INSERT) ...
2022.12.07 12:12:35.556048 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 1)
2022.12.07 12:12:35.556078 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 71004)
2022.12.07 12:12:35.562716 [ 39536 ] {} <Trace> Connection (i13.ch:9000): Connected to ClickHouse server version 22.10.1.
2022.12.07 12:12:35.562750 [ 39536 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
... this is the DirectoryMonitor that created during ATTACH ...
2022.12.07 12:12:35.802080 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 0)
2022.12.07 12:12:35.802107 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 0)
2022.12.07 12:12:35.834216 [ 39265 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
...
2022.12.07 12:12:38.532627 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Sent a batch of 10 files (took 2976 ms).
...
2022.12.07 12:12:38.601051 [ 39265 ] {} <Error> inc.dist_urls_in.DirectoryMonitor: std::exception. Code: 1001, type: std::__1::__fs::filesystem::filesystem_error, e.what() = filesystem error: in file_size: No such file or directory ["/data6/clickhouse/data/inc/dist_urls_in/shard13_replica1/66827403.bin"], Stack trace (when copying this message, always include the lines below):
...
2022.12.07 12:12:54.132837 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> DDLWorker: Executed query: ATTACH TABLE inc.dist_urls_in
And eventually both monitors (for a short period of time, one replaces
another) are trying to process the same batch (current_batch.txt), and
one of them fails because such file had been already removed.
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-01-05 19:46:09 +00:00
|
|
|
node_data.connection_pool,
|
2021-01-09 12:26:37 +00:00
|
|
|
monitors_blocker,
|
2023-01-21 21:37:46 +00:00
|
|
|
getContext()->getDistributedSchedulePool());
|
2020-04-14 18:12:08 +00:00
|
|
|
}
|
Fix race in Distributed table startup
Before this patch it was possible to have multiple directory monitors
for the same directory, one from the INSERT context, another one on
storage startup().
Here are an example of logs for this scenario:
2022.12.07 12:12:27.552485 [ 39925 ] {a47fcb32-4f44-4dbd-94fe-0070d4ea0f6b} <Debug> DDLWorker: Executed query: DETACH TABLE inc.dist_urls_in
...
2022.12.07 12:12:33.228449 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> executeQuery: (from 0.0.0.0:0, user: ) /* ddl_entry=query-0000089229 */ ATTACH TABLE inc.dist_urls_in (stage: Complete)
... this is the DirectoryMonitor created from the context of INSERT for the old StoragePtr that had not been destroyed yet (becase of "was 1" this can be done only from the context of INSERT) ...
2022.12.07 12:12:35.556048 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 1)
2022.12.07 12:12:35.556078 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 71004)
2022.12.07 12:12:35.562716 [ 39536 ] {} <Trace> Connection (i13.ch:9000): Connected to ClickHouse server version 22.10.1.
2022.12.07 12:12:35.562750 [ 39536 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
... this is the DirectoryMonitor that created during ATTACH ...
2022.12.07 12:12:35.802080 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Files set to 173 (was 0)
2022.12.07 12:12:35.802107 [ 39265 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Bytes set to 29750181 (was 0)
2022.12.07 12:12:35.834216 [ 39265 ] {} <Debug> inc.dist_urls_in.DirectoryMonitor: Sending a batch of 10 files to i13.ch:9000 (0.00 rows, 0.00 B bytes).
...
2022.12.07 12:12:38.532627 [ 39536 ] {} <Trace> inc.dist_urls_in.DirectoryMonitor: Sent a batch of 10 files (took 2976 ms).
...
2022.12.07 12:12:38.601051 [ 39265 ] {} <Error> inc.dist_urls_in.DirectoryMonitor: std::exception. Code: 1001, type: std::__1::__fs::filesystem::filesystem_error, e.what() = filesystem error: in file_size: No such file or directory ["/data6/clickhouse/data/inc/dist_urls_in/shard13_replica1/66827403.bin"], Stack trace (when copying this message, always include the lines below):
...
2022.12.07 12:12:54.132837 [ 4408 ] {20c761d3-a46d-417b-9fcd-89a8919dd1fe} <Debug> DDLWorker: Executed query: ATTACH TABLE inc.dist_urls_in
And eventually both monitors (for a short period of time, one replaces
another) are trying to process the same batch (current_batch.txt), and
one of them fails because such file had been already removed.
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-01-05 19:46:09 +00:00
|
|
|
return *node_data.directory_monitor;
|
2014-08-19 08:04:13 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 23:50:47 +00:00
|
|
|
std::vector<StorageDistributedDirectoryMonitor::Status> StorageDistributed::getDirectoryMonitorsStatuses() const
|
2020-06-02 23:47:32 +00:00
|
|
|
{
|
2020-06-03 23:50:47 +00:00
|
|
|
std::vector<StorageDistributedDirectoryMonitor::Status> statuses;
|
|
|
|
std::lock_guard lock(cluster_nodes_mutex);
|
2020-06-06 15:57:52 +00:00
|
|
|
statuses.reserve(cluster_nodes_data.size());
|
2020-06-04 17:23:46 +00:00
|
|
|
for (const auto & node : cluster_nodes_data)
|
2020-06-03 23:50:47 +00:00
|
|
|
statuses.push_back(node.second.directory_monitor->getStatus());
|
|
|
|
return statuses;
|
2020-06-02 23:47:32 +00:00
|
|
|
}
|
|
|
|
|
2021-01-26 18:45:36 +00:00
|
|
|
std::optional<UInt64> StorageDistributed::totalBytes(const Settings &) const
|
|
|
|
{
|
|
|
|
UInt64 total_bytes = 0;
|
|
|
|
for (const auto & status : getDirectoryMonitorsStatuses())
|
|
|
|
total_bytes += status.bytes_count;
|
|
|
|
return total_bytes;
|
|
|
|
}
|
|
|
|
|
2015-09-18 13:36:10 +00:00
|
|
|
size_t StorageDistributed::getShardCount() const
|
|
|
|
{
|
2018-03-16 02:08:31 +00:00
|
|
|
return getCluster()->getShardCount();
|
2016-10-10 08:44:52 +00:00
|
|
|
}
|
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
ClusterPtr StorageDistributed::getCluster() const
|
2017-07-27 15:24:39 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
return owned_cluster ? owned_cluster : getContext()->getCluster(cluster_name);
|
2019-04-08 05:13:16 +00:00
|
|
|
}
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
ClusterPtr StorageDistributed::getOptimizedCluster(
|
2021-07-23 16:47:43 +00:00
|
|
|
ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const
|
2020-03-24 07:51:54 +00:00
|
|
|
{
|
|
|
|
ClusterPtr cluster = getCluster();
|
2021-04-10 23:33:54 +00:00
|
|
|
const Settings & settings = local_context->getSettingsRef();
|
2020-03-24 07:51:54 +00:00
|
|
|
|
2020-10-16 21:58:06 +00:00
|
|
|
bool sharding_key_is_usable = settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic;
|
|
|
|
|
|
|
|
if (has_sharding_key && sharding_key_is_usable)
|
2020-03-24 07:51:54 +00:00
|
|
|
{
|
2021-07-23 16:47:43 +00:00
|
|
|
ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context);
|
2020-03-24 07:51:54 +00:00
|
|
|
if (optimized)
|
|
|
|
return optimized;
|
|
|
|
}
|
|
|
|
|
|
|
|
UInt64 force = settings.force_optimize_skip_unused_shards;
|
2023-01-23 21:13:58 +00:00
|
|
|
if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS || (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY && has_sharding_key))
|
2020-03-24 07:51:54 +00:00
|
|
|
{
|
|
|
|
if (!has_sharding_key)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "No sharding key");
|
2020-10-16 21:58:06 +00:00
|
|
|
else if (!sharding_key_is_usable)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "Sharding key is not deterministic");
|
2020-03-24 07:51:54 +00:00
|
|
|
else
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "Sharding key {} is not used", sharding_key_column_name);
|
2020-03-24 07:51:54 +00:00
|
|
|
}
|
|
|
|
|
2021-03-30 05:02:26 +00:00
|
|
|
return {};
|
2020-03-24 07:51:54 +00:00
|
|
|
}
|
|
|
|
|
2020-05-01 08:31:05 +00:00
|
|
|
IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)
|
2020-04-30 23:37:55 +00:00
|
|
|
{
|
|
|
|
const auto & slot_to_shard = cluster->getSlotToShard();
|
|
|
|
|
|
|
|
// If result.type is DataTypeLowCardinality, do shard according to its dictionaryType
|
|
|
|
#define CREATE_FOR_TYPE(TYPE) \
|
|
|
|
if (typeid_cast<const DataType##TYPE *>(result.type.get())) \
|
|
|
|
return createBlockSelector<TYPE>(*result.column, slot_to_shard); \
|
|
|
|
else if (auto * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(result.type.get())) \
|
|
|
|
if (typeid_cast<const DataType ## TYPE *>(type_low_cardinality->getDictionaryType().get())) \
|
|
|
|
return createBlockSelector<TYPE>(*result.column->convertToFullColumnIfLowCardinality(), slot_to_shard);
|
|
|
|
|
|
|
|
CREATE_FOR_TYPE(UInt8)
|
|
|
|
CREATE_FOR_TYPE(UInt16)
|
|
|
|
CREATE_FOR_TYPE(UInt32)
|
|
|
|
CREATE_FOR_TYPE(UInt64)
|
|
|
|
CREATE_FOR_TYPE(Int8)
|
|
|
|
CREATE_FOR_TYPE(Int16)
|
|
|
|
CREATE_FOR_TYPE(Int32)
|
|
|
|
CREATE_FOR_TYPE(Int64)
|
|
|
|
|
|
|
|
#undef CREATE_FOR_TYPE
|
|
|
|
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::TYPE_MISMATCH, "Sharding key expression does not evaluate to an integer type");
|
2020-04-30 23:37:55 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 12:38:13 +00:00
|
|
|
/// Returns a new cluster with fewer shards if constant folding for `sharding_key_expr` is possible
|
2019-08-19 20:28:24 +00:00
|
|
|
/// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr`
|
2020-06-17 16:39:58 +00:00
|
|
|
ClusterPtr StorageDistributed::skipUnusedShards(
|
|
|
|
ClusterPtr cluster,
|
|
|
|
const ASTPtr & query_ptr,
|
2021-07-23 16:47:43 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr local_context) const
|
2018-12-19 12:38:13 +00:00
|
|
|
{
|
2020-03-24 07:51:54 +00:00
|
|
|
const auto & select = query_ptr->as<ASTSelectQuery &>();
|
2018-12-19 12:38:13 +00:00
|
|
|
|
2019-08-19 20:28:24 +00:00
|
|
|
if (!select.prewhere() && !select.where())
|
|
|
|
{
|
2018-12-19 12:38:13 +00:00
|
|
|
return nullptr;
|
2019-08-19 20:28:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ASTPtr condition_ast;
|
|
|
|
if (select.prewhere() && select.where())
|
|
|
|
{
|
|
|
|
condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone();
|
|
|
|
}
|
2018-12-19 12:38:13 +00:00
|
|
|
|
2021-07-23 16:47:43 +00:00
|
|
|
replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot);
|
2018-12-19 12:38:13 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
size_t limit = local_context->getSettingsRef().optimize_skip_unused_shards_limit;
|
2021-03-08 07:05:56 +00:00
|
|
|
if (!limit || limit > SSIZE_MAX)
|
|
|
|
{
|
2021-06-28 18:55:30 +00:00
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "optimize_skip_unused_shards_limit out of range (0, {}]", SSIZE_MAX);
|
2021-03-08 07:05:56 +00:00
|
|
|
}
|
|
|
|
// To interpret limit==0 as limit is reached
|
|
|
|
++limit;
|
|
|
|
const auto blocks = evaluateExpressionOverConstantCondition(condition_ast, sharding_key_expr, limit);
|
|
|
|
|
|
|
|
if (!limit)
|
2018-12-19 12:38:13 +00:00
|
|
|
{
|
2021-04-23 17:51:47 +00:00
|
|
|
LOG_DEBUG(log,
|
2021-03-08 07:05:56 +00:00
|
|
|
"Number of values for sharding key exceeds optimize_skip_unused_shards_limit={}, "
|
|
|
|
"try to increase it, but note that this may increase query processing time.",
|
2021-04-10 23:33:54 +00:00
|
|
|
local_context->getSettingsRef().optimize_skip_unused_shards_limit);
|
2018-12-19 12:38:13 +00:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Can't get definite answer if we can skip any shards
|
|
|
|
if (!blocks)
|
|
|
|
{
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::set<int> shards;
|
|
|
|
|
|
|
|
for (const auto & block : *blocks)
|
|
|
|
{
|
|
|
|
if (!block.has(sharding_key_column_name))
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::TOO_MANY_ROWS, "sharding_key_expr should evaluate as a single row");
|
2018-12-19 12:38:13 +00:00
|
|
|
|
2020-03-18 03:27:32 +00:00
|
|
|
const ColumnWithTypeAndName & result = block.getByName(sharding_key_column_name);
|
2018-12-19 12:38:13 +00:00
|
|
|
const auto selector = createSelector(cluster, result);
|
|
|
|
|
|
|
|
shards.insert(selector.begin(), selector.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
return cluster->getClusterWithMultipleShards({shards.begin(), shards.end()});
|
|
|
|
}
|
|
|
|
|
2019-04-08 05:13:16 +00:00
|
|
|
ActionLock StorageDistributed::getActionLock(StorageActionBlockType type)
|
|
|
|
{
|
2019-04-22 15:11:16 +00:00
|
|
|
if (type == ActionLocks::DistributedSend)
|
2019-04-08 05:13:16 +00:00
|
|
|
return monitors_blocker.cancel();
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2021-07-01 13:21:38 +00:00
|
|
|
void StorageDistributed::flush()
|
|
|
|
{
|
2021-07-01 16:43:59 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
flushClusterNodesAllData(getContext());
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException(log, "Cannot flush");
|
|
|
|
}
|
2021-07-01 13:21:38 +00:00
|
|
|
}
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
void StorageDistributed::flushClusterNodesAllData(ContextPtr local_context)
|
2019-04-08 05:13:16 +00:00
|
|
|
{
|
2021-02-10 20:07:28 +00:00
|
|
|
/// Sync SYSTEM FLUSH DISTRIBUTED with TRUNCATE
|
2021-04-10 23:33:54 +00:00
|
|
|
auto table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
|
2021-02-10 20:07:28 +00:00
|
|
|
|
2021-02-08 19:07:30 +00:00
|
|
|
std::vector<std::shared_ptr<StorageDistributedDirectoryMonitor>> directory_monitors;
|
|
|
|
|
|
|
|
{
|
|
|
|
std::lock_guard lock(cluster_nodes_mutex);
|
|
|
|
|
|
|
|
directory_monitors.reserve(cluster_nodes_data.size());
|
|
|
|
for (auto & node : cluster_nodes_data)
|
|
|
|
directory_monitors.push_back(node.second.directory_monitor);
|
|
|
|
}
|
2019-04-08 05:13:16 +00:00
|
|
|
|
|
|
|
/// TODO: Maybe it should be executed in parallel
|
2021-02-08 19:07:30 +00:00
|
|
|
for (auto & node : directory_monitors)
|
|
|
|
node->flushAllData();
|
2019-04-08 05:13:16 +00:00
|
|
|
}
|
|
|
|
|
2020-04-07 14:05:51 +00:00
|
|
|
void StorageDistributed::rename(const String & new_path_to_table_data, const StorageID & new_table_id)
|
2019-12-19 19:39:49 +00:00
|
|
|
{
|
2020-09-18 19:25:56 +00:00
|
|
|
assert(relative_data_path != new_path_to_table_data);
|
|
|
|
if (!relative_data_path.empty())
|
2020-01-20 17:54:52 +00:00
|
|
|
renameOnDisk(new_path_to_table_data);
|
2020-04-07 14:05:51 +00:00
|
|
|
renameInMemory(new_table_id);
|
2020-01-20 17:54:52 +00:00
|
|
|
}
|
2020-07-23 14:10:48 +00:00
|
|
|
|
|
|
|
|
2020-12-23 16:04:05 +00:00
|
|
|
size_t StorageDistributed::getRandomShardIndex(const Cluster::ShardsInfo & shards)
|
|
|
|
{
|
|
|
|
|
|
|
|
UInt32 total_weight = 0;
|
|
|
|
for (const auto & shard : shards)
|
|
|
|
total_weight += shard.weight;
|
|
|
|
|
|
|
|
assert(total_weight > 0);
|
|
|
|
|
|
|
|
size_t res;
|
|
|
|
{
|
|
|
|
std::lock_guard lock(rng_mutex);
|
|
|
|
res = std::uniform_int_distribution<size_t>(0, total_weight - 1)(rng);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto i = 0ul, s = shards.size(); i < s; ++i)
|
|
|
|
{
|
|
|
|
if (shards[i].weight > res)
|
|
|
|
return i;
|
|
|
|
res -= shards[i].weight;
|
|
|
|
}
|
|
|
|
|
2022-10-07 19:20:14 +00:00
|
|
|
UNREACHABLE();
|
2020-12-23 16:04:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
void StorageDistributed::renameOnDisk(const String & new_path_to_table_data)
|
|
|
|
{
|
2020-09-15 09:26:56 +00:00
|
|
|
for (const DiskPtr & disk : data_volume->getDisks())
|
2019-12-19 19:39:49 +00:00
|
|
|
{
|
2021-06-24 10:00:33 +00:00
|
|
|
disk->createDirectories(new_path_to_table_data);
|
2020-09-18 19:08:53 +00:00
|
|
|
disk->moveDirectory(relative_data_path, new_path_to_table_data);
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2020-09-18 19:08:53 +00:00
|
|
|
auto new_path = disk->getPath() + new_path_to_table_data;
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_DEBUG(log, "Updating path to {}", new_path);
|
2020-01-20 17:54:52 +00:00
|
|
|
|
2019-12-19 19:39:49 +00:00
|
|
|
std::lock_guard lock(cluster_nodes_mutex);
|
|
|
|
for (auto & node : cluster_nodes_data)
|
2021-01-09 12:26:37 +00:00
|
|
|
node.second.directory_monitor->updatePath(new_path_to_table_data);
|
2019-12-19 19:39:49 +00:00
|
|
|
}
|
2020-01-20 17:54:52 +00:00
|
|
|
|
|
|
|
relative_data_path = new_path_to_table_data;
|
2019-12-19 19:39:49 +00:00
|
|
|
}
|
|
|
|
|
2021-01-27 18:43:41 +00:00
|
|
|
void StorageDistributed::delayInsertOrThrowIfNeeded() const
|
2021-01-26 18:45:37 +00:00
|
|
|
{
|
2021-01-27 18:43:41 +00:00
|
|
|
if (!distributed_settings.bytes_to_throw_insert &&
|
|
|
|
!distributed_settings.bytes_to_delay_insert)
|
2021-01-26 18:45:37 +00:00
|
|
|
return;
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef());
|
2021-01-27 18:43:41 +00:00
|
|
|
|
|
|
|
if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert)
|
2021-01-26 18:45:37 +00:00
|
|
|
{
|
2021-01-27 18:43:41 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::DistributedRejectedInserts);
|
2021-01-26 18:45:37 +00:00
|
|
|
throw Exception(ErrorCodes::DISTRIBUTED_TOO_MANY_PENDING_BYTES,
|
|
|
|
"Too many bytes pending for async INSERT: {} (bytes_to_throw_insert={})",
|
|
|
|
formatReadableSizeWithBinarySuffix(total_bytes),
|
|
|
|
formatReadableSizeWithBinarySuffix(distributed_settings.bytes_to_throw_insert));
|
|
|
|
}
|
2021-01-27 18:43:41 +00:00
|
|
|
|
|
|
|
if (distributed_settings.bytes_to_delay_insert && total_bytes > distributed_settings.bytes_to_delay_insert)
|
|
|
|
{
|
|
|
|
/// Step is 5% of the delay and minimal one second.
|
|
|
|
/// NOTE: max_delay_to_insert is in seconds, and step is in ms.
|
2022-09-11 01:21:34 +00:00
|
|
|
const size_t step_ms = static_cast<size_t>(std::min<double>(1., static_cast<double>(distributed_settings.max_delay_to_insert) * 1'000 * 0.05));
|
2021-01-27 18:43:41 +00:00
|
|
|
UInt64 delayed_ms = 0;
|
|
|
|
|
|
|
|
do {
|
|
|
|
delayed_ms += step_ms;
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(step_ms));
|
2021-04-10 23:33:54 +00:00
|
|
|
} while (*totalBytes(getContext()->getSettingsRef()) > distributed_settings.bytes_to_delay_insert && delayed_ms < distributed_settings.max_delay_to_insert*1000);
|
2021-01-27 18:43:41 +00:00
|
|
|
|
|
|
|
ProfileEvents::increment(ProfileEvents::DistributedDelayedInserts);
|
|
|
|
ProfileEvents::increment(ProfileEvents::DistributedDelayedInsertsMilliseconds, delayed_ms);
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
UInt64 new_total_bytes = *totalBytes(getContext()->getSettingsRef());
|
2021-01-27 18:43:41 +00:00
|
|
|
LOG_INFO(log, "Too many bytes pending for async INSERT: was {}, now {}, INSERT was delayed to {} ms",
|
|
|
|
formatReadableSizeWithBinarySuffix(total_bytes),
|
|
|
|
formatReadableSizeWithBinarySuffix(new_total_bytes),
|
|
|
|
delayed_ms);
|
|
|
|
|
|
|
|
if (new_total_bytes > distributed_settings.bytes_to_delay_insert)
|
|
|
|
{
|
|
|
|
ProfileEvents::increment(ProfileEvents::DistributedRejectedInserts);
|
|
|
|
throw Exception(ErrorCodes::DISTRIBUTED_TOO_MANY_PENDING_BYTES,
|
|
|
|
"Too many bytes pending for async INSERT: {} (bytes_to_delay_insert={})",
|
|
|
|
formatReadableSizeWithBinarySuffix(new_total_bytes),
|
|
|
|
formatReadableSizeWithBinarySuffix(distributed_settings.bytes_to_delay_insert));
|
|
|
|
}
|
|
|
|
}
|
2021-01-26 18:45:37 +00:00
|
|
|
}
|
2017-12-30 00:36:06 +00:00
|
|
|
|
|
|
|
void registerStorageDistributed(StorageFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerStorage("Distributed", [](const StorageFactory::Arguments & args)
|
|
|
|
{
|
|
|
|
/** Arguments of engine is following:
|
|
|
|
* - name of cluster in configuration;
|
|
|
|
* - name of remote database;
|
|
|
|
* - name of remote table;
|
2020-01-20 17:54:52 +00:00
|
|
|
* - policy to store data in;
|
2017-12-30 00:36:06 +00:00
|
|
|
*
|
|
|
|
* Remote database may be specified in following form:
|
|
|
|
* - identifier;
|
|
|
|
* - constant expression with string result, like currentDatabase();
|
|
|
|
* -- string literal as specific case;
|
|
|
|
* - empty string means 'use default database from cluster'.
|
2021-01-07 14:14:41 +00:00
|
|
|
*
|
|
|
|
* Distributed engine also supports SETTINGS clause.
|
2017-12-30 00:36:06 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
ASTs & engine_args = args.engine_args;
|
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
if (engine_args.size() < 3 || engine_args.size() > 5)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
|
|
|
"Storage Distributed requires from 3 "
|
|
|
|
"to 5 parameters - name of configuration section with list "
|
|
|
|
"of remote servers, name of remote database, name "
|
|
|
|
"of remote table, sharding key expression (optional), policy to store data in (optional).");
|
2017-12-30 00:36:06 +00:00
|
|
|
|
2020-02-21 13:44:44 +00:00
|
|
|
String cluster_name = getClusterNameAndMakeLiteral(engine_args[0]);
|
2017-12-30 00:36:06 +00:00
|
|
|
|
2021-07-15 06:26:10 +00:00
|
|
|
const ContextPtr & context = args.getContext();
|
|
|
|
const ContextPtr & local_context = args.getLocalContext();
|
|
|
|
|
|
|
|
engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], local_context);
|
|
|
|
engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], local_context);
|
2017-12-30 00:36:06 +00:00
|
|
|
|
2022-06-23 20:04:06 +00:00
|
|
|
String remote_database = checkAndGetLiteralArgument<String>(engine_args[1], "remote_database");
|
|
|
|
String remote_table = checkAndGetLiteralArgument<String>(engine_args[2], "remote_table");
|
2017-12-30 00:36:06 +00:00
|
|
|
|
2020-01-20 17:54:52 +00:00
|
|
|
const auto & sharding_key = engine_args.size() >= 4 ? engine_args[3] : nullptr;
|
2021-09-07 11:05:26 +00:00
|
|
|
String storage_policy = "default";
|
|
|
|
if (engine_args.size() >= 5)
|
|
|
|
{
|
|
|
|
engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], local_context);
|
2022-06-23 20:04:06 +00:00
|
|
|
storage_policy = checkAndGetLiteralArgument<String>(engine_args[4], "storage_policy");
|
2021-09-07 11:05:26 +00:00
|
|
|
}
|
2017-12-30 00:36:06 +00:00
|
|
|
|
|
|
|
/// Check that sharding_key exists in the table and has numeric type.
|
|
|
|
if (sharding_key)
|
|
|
|
{
|
2021-07-15 06:26:10 +00:00
|
|
|
auto sharding_expr = buildShardingKeyExpression(sharding_key, context, args.columns.getAllPhysical(), true);
|
2017-12-30 00:36:06 +00:00
|
|
|
const Block & block = sharding_expr->getSampleBlock();
|
|
|
|
|
|
|
|
if (block.columns() != 1)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Sharding expression must return exactly one column");
|
2017-12-30 00:36:06 +00:00
|
|
|
|
|
|
|
auto type = block.getByPosition(0).type;
|
|
|
|
|
|
|
|
if (!type->isValueRepresentedByInteger())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::TYPE_MISMATCH, "Sharding expression has type {}, but should be one of integer type",
|
|
|
|
type->getName());
|
2017-12-30 00:36:06 +00:00
|
|
|
}
|
|
|
|
|
2021-01-07 14:14:41 +00:00
|
|
|
/// TODO: move some arguments from the arguments to the SETTINGS.
|
|
|
|
DistributedSettings distributed_settings;
|
|
|
|
if (args.storage_def->settings)
|
|
|
|
{
|
|
|
|
distributed_settings.loadFromQuery(*args.storage_def);
|
|
|
|
}
|
|
|
|
|
2021-01-27 18:43:41 +00:00
|
|
|
if (distributed_settings.max_delay_to_insert < 1)
|
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
|
|
|
|
"max_delay_to_insert cannot be less then 1");
|
|
|
|
|
|
|
|
if (distributed_settings.bytes_to_throw_insert && distributed_settings.bytes_to_delay_insert &&
|
|
|
|
distributed_settings.bytes_to_throw_insert <= distributed_settings.bytes_to_delay_insert)
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
|
|
|
|
"bytes_to_throw_insert cannot be less or equal to bytes_to_delay_insert (since it is handled first)");
|
|
|
|
}
|
|
|
|
|
2021-07-15 06:26:10 +00:00
|
|
|
/// Set default values from the distributed_directory_monitor_* global context settings.
|
|
|
|
if (!distributed_settings.monitor_batch_inserts.changed)
|
|
|
|
distributed_settings.monitor_batch_inserts = context->getSettingsRef().distributed_directory_monitor_batch_inserts;
|
|
|
|
if (!distributed_settings.monitor_split_batch_on_failure.changed)
|
|
|
|
distributed_settings.monitor_split_batch_on_failure = context->getSettingsRef().distributed_directory_monitor_split_batch_on_failure;
|
|
|
|
if (!distributed_settings.monitor_sleep_time_ms.changed)
|
|
|
|
distributed_settings.monitor_sleep_time_ms = Poco::Timespan(context->getSettingsRef().distributed_directory_monitor_sleep_time_ms);
|
|
|
|
if (!distributed_settings.monitor_max_sleep_time_ms.changed)
|
|
|
|
distributed_settings.monitor_max_sleep_time_ms = Poco::Timespan(context->getSettingsRef().distributed_directory_monitor_max_sleep_time_ms);
|
|
|
|
|
2022-04-19 20:47:29 +00:00
|
|
|
return std::make_shared<StorageDistributed>(
|
2021-04-23 12:18:23 +00:00
|
|
|
args.table_id,
|
|
|
|
args.columns,
|
|
|
|
args.constraints,
|
|
|
|
args.comment,
|
|
|
|
remote_database,
|
|
|
|
remote_table,
|
|
|
|
cluster_name,
|
2021-07-15 06:26:10 +00:00
|
|
|
context,
|
2020-01-20 17:54:52 +00:00
|
|
|
sharding_key,
|
|
|
|
storage_policy,
|
|
|
|
args.relative_data_path,
|
2021-01-07 14:14:41 +00:00
|
|
|
distributed_settings,
|
2018-03-16 02:08:31 +00:00
|
|
|
args.attach);
|
2020-04-06 05:19:40 +00:00
|
|
|
},
|
|
|
|
{
|
2021-01-07 14:14:41 +00:00
|
|
|
.supports_settings = true,
|
2021-01-08 11:42:17 +00:00
|
|
|
.supports_parallel_insert = true,
|
2021-12-15 11:30:57 +00:00
|
|
|
.supports_schema_inference = true,
|
2020-04-06 05:19:40 +00:00
|
|
|
.source_access_type = AccessType::REMOTE,
|
2017-12-30 00:36:06 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2012-05-21 20:38:34 +00:00
|
|
|
}
|