#include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int THERE_IS_NO_COLUMN; } BlockIO InterpreterOptimizeQuery::execute() { const auto & ast = query_ptr->as(); if (!ast.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, context, getRequiredAccess()); context.checkAccess(getRequiredAccess()); auto table_id = context.resolveStorageID(ast, Context::ResolveOrdinary); StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); auto metadata_snapshot = table->getInMemoryMetadataPtr(); // Empty list of names means we deduplicate by all columns, but user can explicitly state which columns to use. Names column_names; if (ast.deduplicate_by_columns) { // User requested custom set of columns for deduplication, possibly with Column Transformer expression. { // Expand asterisk, column transformers, etc into list of column names. const auto cols = processColumnTransformers(context.getCurrentDatabase(), table, metadata_snapshot, ast.deduplicate_by_columns); for (const auto & col : cols->children) column_names.emplace_back(col->getColumnName()); } metadata_snapshot->check(column_names, NamesAndTypesList{}, table_id); Names required_columns; { required_columns = metadata_snapshot->getColumnsRequiredForSortingKey(); const auto partitioning_cols = metadata_snapshot->getColumnsRequiredForPartitionKey(); required_columns.reserve(required_columns.size() + partitioning_cols.size()); required_columns.insert(required_columns.end(), partitioning_cols.begin(), partitioning_cols.end()); } for (const auto & required_col : required_columns) { // Deduplication is performed only for adjacent rows in a block, // and all rows in block are in the sorting key order within a single partition, // hence deduplication always implicitly takes sorting keys and partition keys in account. // So we just explicitly state that limitation in order to avoid confusion. if (std::find(column_names.begin(), column_names.end(), required_col) == column_names.end()) throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "DEDUPLICATE BY expression must include all columns used in table's" " ORDER BY, PRIMARY KEY, or PARTITION BY but '{}' is missing." " Expanded DEDUPLICATE BY columns expression: ['{}']", required_col, fmt::join(column_names, "', '")); } } table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, context); return {}; } AccessRightsElements InterpreterOptimizeQuery::getRequiredAccess() const { const auto & optimize = query_ptr->as(); AccessRightsElements required_access; required_access.emplace_back(AccessType::OPTIMIZE, optimize.database, optimize.table); return required_access; } }