ClickHouse/src/Interpreters/InterpreterOptimizeQuery.cpp
youennL-cs 6526c2a8ab
[RFC] Replacing merge tree new engine (#41005)
* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Replace sign column(Int8) by is_deleted(UInt8)

* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Replace sign column(Int8) by is_deleted(UInt8)

* Add new engine to ReplacingMergeTree corresponding to the ReplacingCollapsingMergeTree

* Add new test for the new ReplacingMergeTree engine

* Limit sign value to -1/1

* Replace sign column(Int8) by is_deleted(UInt8)

* Add keyword 'CLEANUP' when OPTIMIZE

* Cleanup uniquely when it's a replacingMergeTree

* Propagate CLEANUP information and change from 'with_cleanup' to 'cleanup'

* Cleanup data flagged as 'is_deleted'

* Fix merge when optimize and add a test

* Fix OPTIMIZE and INSERT + add tests

* New fix for cleanup at the merge

* Cleanup debug logs

* Add the SETTINGS option 'clean_deleted_rows' that can be 'never' or 'always'

* Fix regression bug; Now REplicatedMergeTree can be called as before without 'is_deleted'

* Add Replicated tests

* Disable tag 'long' for our test and cleanup some white spaces

* Update tests

* Fix tests and remove additional useless whitespace

* Fix replica test

* Style clean && add condition check for is_deleted values

* clean_deleted_rows settings is nom an enum

* Add valid default value to the clean_deleted_rows settings

* Update cleanup checkers to use the enum and fix typos in the test

* Fix submodule contrib/AMQP-CPP pointer

* Add missing messages in test reference and remove a print with non derterministic order

* fix replica test reference

* Fix edge case

* Fix a typo for the spell checker

* Fix reference

* Fix a condition to raise an error if is_deleted differ from 0/1 and cleanup

* Change tests file name and update number

* This should fix the ReplacingMergeTree parameter set

* Fix replicated parameters

* Disable allow_deprecated_syntax_for_merge_tree for our new column

* Fix a test

* Remove non deterministic order print in the test

* Test on replicas

* Remove a condition, when checking optional parameters, that should not be sueful since we disabled the deprected_syntaxe

* Revert "Remove a condition, when checking optional parameters, that should not be useful since we disabled the deprected_syntaxe"

This reverts commit b65d64c05e.

* Fix replica management and limit the number of argument to two maximum, due to the possiblity of deprecated table create/attach failing otherwise

* Test a fix for replicated log information error

* Try to add sync to have consistent results

* Change path of replicas that should cause one issue and add few prints in case it's not that

* Get cleanup info on replicas only if information found

* Fix style issues

* Try to avoid replication error 'cannot select parts...' and and replica read/write field order

* Cleanup according to PR reviews
 and add tests on error raised.

* Update src/Storages/MergeTree/registerStorageMergeTree.cpp

Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com>

* Select ... FINAL don't show rows with is_deleted = true

* Update and fix SELECT ... FINAL merge parameter

* Remove is_deleted rows only on the version inserted when merge

* Fix (master) updates issues

* Revert changes that should not be commited

* Add changes according to review

* Revert changes that should not be commited - part 2

---------

Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com>
2023-02-16 16:03:16 +03:00

97 lines
3.9 KiB
C++

#include <Storages/IStorage.h>
#include <Parsers/ASTOptimizeQuery.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/InterpreterOptimizeQuery.h>
#include <Access/Common/AccessRightsElement.h>
#include <Common/typeid_cast.h>
#include <Parsers/ASTExpressionList.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/processColumnTransformers.h>
#include <memory>
namespace DB
{
namespace ErrorCodes
{
extern const int THERE_IS_NO_COLUMN;
}
BlockIO InterpreterOptimizeQuery::execute()
{
const auto & ast = query_ptr->as<ASTOptimizeQuery &>();
if (!ast.cluster.empty())
{
DDLQueryOnClusterParams params;
params.access_to_check = getRequiredAccess();
return executeDDLQueryOnCluster(query_ptr, getContext(), params);
}
getContext()->checkAccess(getRequiredAccess());
auto table_id = getContext()->resolveStorageID(ast, Context::ResolveOrdinary);
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
checkStorageSupportsTransactionsIfNeeded(table, getContext());
auto metadata_snapshot = table->getInMemoryMetadataPtr();
auto storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext());
// Empty list of names means we deduplicate by all columns, but user can explicitly state which columns to use.
Names column_names;
if (ast.deduplicate_by_columns)
{
// User requested custom set of columns for deduplication, possibly with Column Transformer expression.
{
// Expand asterisk, column transformers, etc into list of column names.
const auto cols
= processColumnTransformers(getContext()->getCurrentDatabase(), table, metadata_snapshot, ast.deduplicate_by_columns);
for (const auto & col : cols->children)
column_names.emplace_back(col->getColumnName());
}
storage_snapshot->check(column_names);
Names required_columns;
{
required_columns = metadata_snapshot->getColumnsRequiredForSortingKey();
const auto partitioning_cols = metadata_snapshot->getColumnsRequiredForPartitionKey();
required_columns.reserve(required_columns.size() + partitioning_cols.size());
required_columns.insert(required_columns.end(), partitioning_cols.begin(), partitioning_cols.end());
}
for (const auto & required_col : required_columns)
{
// Deduplication is performed only for adjacent rows in a block,
// and all rows in block are in the sorting key order within a single partition,
// hence deduplication always implicitly takes sorting keys and partition keys in account.
// So we just explicitly state that limitation in order to avoid confusion.
if (std::find(column_names.begin(), column_names.end(), required_col) == column_names.end())
throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
"DEDUPLICATE BY expression must include all columns used in table's"
" ORDER BY, PRIMARY KEY, or PARTITION BY but '{}' is missing."
" Expanded DEDUPLICATE BY columns expression: ['{}']",
required_col, fmt::join(column_names, "', '"));
}
}
if (auto * snapshot_data = dynamic_cast<MergeTreeData::SnapshotData *>(storage_snapshot->data.get()))
snapshot_data->parts = {};
table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, ast.cleanup, getContext());
return {};
}
AccessRightsElements InterpreterOptimizeQuery::getRequiredAccess() const
{
const auto & optimize = query_ptr->as<const ASTOptimizeQuery &>();
AccessRightsElements required_access;
required_access.emplace_back(AccessType::OPTIMIZE, optimize.getDatabase(), optimize.getTable());
return required_access;
}
}