2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
|
|
|
#include <Parsers/ASTOptimizeQuery.h>
|
2017-05-23 18:24:43 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2020-11-03 13:47:26 +00:00
|
|
|
#include <Interpreters/executeDDLQueryOnCluster.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/InterpreterOptimizeQuery.h>
|
2021-10-31 08:51:20 +00:00
|
|
|
#include <Access/Common/AccessRightsElement.h>
|
2017-07-13 20:58:19 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2020-12-01 09:10:12 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
2022-10-01 22:44:46 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
2016-12-12 07:24:56 +00:00
|
|
|
|
2020-12-01 09:10:12 +00:00
|
|
|
#include <Interpreters/processColumnTransformers.h>
|
|
|
|
|
|
|
|
#include <memory>
|
2016-12-12 07:24:56 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-04-08 01:32:05 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-12-01 09:10:12 +00:00
|
|
|
extern const int THERE_IS_NO_COLUMN;
|
2017-04-08 01:32:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-12-12 07:24:56 +00:00
|
|
|
BlockIO InterpreterOptimizeQuery::execute()
|
|
|
|
{
|
2019-03-15 16:14:13 +00:00
|
|
|
const auto & ast = query_ptr->as<ASTOptimizeQuery &>();
|
2016-12-12 07:24:56 +00:00
|
|
|
|
2019-03-15 16:14:13 +00:00
|
|
|
if (!ast.cluster.empty())
|
2022-04-22 12:15:29 +00:00
|
|
|
{
|
|
|
|
DDLQueryOnClusterParams params;
|
|
|
|
params.access_to_check = getRequiredAccess();
|
|
|
|
return executeDDLQueryOnCluster(query_ptr, getContext(), params);
|
|
|
|
}
|
2020-01-24 16:20:36 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
getContext()->checkAccess(getRequiredAccess());
|
2018-07-20 05:46:48 +00:00
|
|
|
|
2023-06-09 11:31:49 +00:00
|
|
|
auto table_id = getContext()->resolveStorageID(ast);
|
2021-04-10 23:33:54 +00:00
|
|
|
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
|
2022-01-31 22:27:55 +00:00
|
|
|
checkStorageSupportsTransactionsIfNeeded(table, getContext());
|
2020-06-17 13:39:26 +00:00
|
|
|
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
2022-03-17 17:26:18 +00:00
|
|
|
auto storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext());
|
2020-12-01 09:10:12 +00:00
|
|
|
|
|
|
|
// Empty list of names means we deduplicate by all columns, but user can explicitly state which columns to use.
|
|
|
|
Names column_names;
|
|
|
|
if (ast.deduplicate_by_columns)
|
|
|
|
{
|
|
|
|
// User requested custom set of columns for deduplication, possibly with Column Transformer expression.
|
|
|
|
{
|
2020-12-09 15:08:37 +00:00
|
|
|
// Expand asterisk, column transformers, etc into list of column names.
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto cols
|
|
|
|
= processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, ast.deduplicate_by_columns);
|
2020-12-01 09:10:12 +00:00
|
|
|
for (const auto & col : cols->children)
|
|
|
|
column_names.emplace_back(col->getColumnName());
|
|
|
|
}
|
|
|
|
|
2021-07-09 03:15:41 +00:00
|
|
|
storage_snapshot->check(column_names);
|
2020-12-15 10:41:00 +00:00
|
|
|
Names required_columns;
|
|
|
|
{
|
|
|
|
required_columns = metadata_snapshot->getColumnsRequiredForSortingKey();
|
|
|
|
const auto partitioning_cols = metadata_snapshot->getColumnsRequiredForPartitionKey();
|
|
|
|
required_columns.reserve(required_columns.size() + partitioning_cols.size());
|
|
|
|
required_columns.insert(required_columns.end(), partitioning_cols.begin(), partitioning_cols.end());
|
|
|
|
}
|
|
|
|
for (const auto & required_col : required_columns)
|
2020-12-01 09:10:12 +00:00
|
|
|
{
|
2020-12-09 15:08:37 +00:00
|
|
|
// Deduplication is performed only for adjacent rows in a block,
|
2020-12-15 10:41:00 +00:00
|
|
|
// and all rows in block are in the sorting key order within a single partition,
|
2020-12-24 10:11:07 +00:00
|
|
|
// hence deduplication always implicitly takes sorting keys and partition keys in account.
|
2020-12-09 15:08:37 +00:00
|
|
|
// So we just explicitly state that limitation in order to avoid confusion.
|
2020-12-15 10:41:00 +00:00
|
|
|
if (std::find(column_names.begin(), column_names.end(), required_col) == column_names.end())
|
2020-12-09 15:08:37 +00:00
|
|
|
throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
|
2020-12-15 10:41:00 +00:00
|
|
|
"DEDUPLICATE BY expression must include all columns used in table's"
|
|
|
|
" ORDER BY, PRIMARY KEY, or PARTITION BY but '{}' is missing."
|
|
|
|
" Expanded DEDUPLICATE BY columns expression: ['{}']",
|
|
|
|
required_col, fmt::join(column_names, "', '"));
|
2020-12-01 09:10:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-01 22:44:46 +00:00
|
|
|
if (auto * snapshot_data = dynamic_cast<MergeTreeData::SnapshotData *>(storage_snapshot->data.get()))
|
|
|
|
snapshot_data->parts = {};
|
|
|
|
|
2023-12-28 13:07:59 +00:00
|
|
|
table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, ast.cleanup, getContext());
|
2020-12-01 09:10:12 +00:00
|
|
|
|
2016-12-12 07:24:56 +00:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2020-01-24 16:20:36 +00:00
|
|
|
|
|
|
|
AccessRightsElements InterpreterOptimizeQuery::getRequiredAccess() const
|
|
|
|
{
|
|
|
|
const auto & optimize = query_ptr->as<const ASTOptimizeQuery &>();
|
|
|
|
AccessRightsElements required_access;
|
2021-09-06 22:13:54 +00:00
|
|
|
required_access.emplace_back(AccessType::OPTIMIZE, optimize.getDatabase(), optimize.getTable());
|
2020-01-24 16:20:36 +00:00
|
|
|
return required_access;
|
|
|
|
}
|
|
|
|
|
2016-12-12 07:24:56 +00:00
|
|
|
}
|