2018-11-01 13:37:36 +00:00
|
|
|
#include <Storages/MergeTree/ReplicatedMergeTreeAlterThread.h>
|
|
|
|
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
|
2018-12-11 13:30:20 +00:00
|
|
|
#include <Storages/MergeTree/ReplicatedMergeTreePartHeader.h>
|
2018-11-01 13:37:36 +00:00
|
|
|
#include <Storages/ColumnsDescription.h>
|
|
|
|
#include <Storages/StorageReplicatedMergeTree.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/setThreadName.h>
|
2018-04-03 17:35:48 +00:00
|
|
|
#include <Common/ZooKeeper/KeeperException.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/InterpreterAlterQuery.h>
|
|
|
|
#include <Databases/IDatabase.h>
|
2016-04-09 04:22:11 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
#include <memory>
|
|
|
|
|
2016-04-09 04:22:11 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NOT_FOUND_NODE;
|
|
|
|
}
|
|
|
|
|
2016-04-09 04:22:11 +00:00
|
|
|
static const auto ALTER_ERROR_SLEEP_MS = 10 * 1000;
|
|
|
|
|
|
|
|
|
2018-05-31 13:05:05 +00:00
|
|
|
ReplicatedMergeTreeAlterThread::ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_)
|
|
|
|
: storage(storage_)
|
2018-11-01 13:37:36 +00:00
|
|
|
, zk_node_cache([&] { return storage.getZooKeeper(); })
|
2018-05-31 13:05:05 +00:00
|
|
|
, log_name(storage.database_name + "." + storage.table_name + " (ReplicatedMergeTreeAlterThread)")
|
|
|
|
, log(&Logger::get(log_name))
|
2017-12-29 22:32:04 +00:00
|
|
|
{
|
2019-01-04 12:10:00 +00:00
|
|
|
task = storage_.global_context.getSchedulePool().createTask(log_name, [this]{ run(); });
|
2017-12-29 22:32:04 +00:00
|
|
|
}
|
2016-04-09 04:22:11 +00:00
|
|
|
|
|
|
|
void ReplicatedMergeTreeAlterThread::run()
|
|
|
|
{
|
2017-12-29 22:32:04 +00:00
|
|
|
try
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-12-29 22:32:04 +00:00
|
|
|
/** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns),
|
2019-05-03 02:00:57 +00:00
|
|
|
* as well as a description of columns in local file with metadata (storage.getColumnsList()).
|
2017-12-29 22:32:04 +00:00
|
|
|
*
|
|
|
|
* If these descriptions are different - you need to do ALTER.
|
|
|
|
*
|
|
|
|
* If stored version of the node (columns_version) differs from the version in ZK,
|
|
|
|
* then the description of the columns in ZK does not necessarily differ from the local
|
|
|
|
* - this can happen with a loop from ALTER-s, which as a whole, does not change anything.
|
|
|
|
* In this case, you need to update the stored version number,
|
|
|
|
* and also check the structure of parts, and, if necessary, make ALTER.
|
|
|
|
*
|
|
|
|
* Recorded version number needs to be updated after updating the metadata, under lock.
|
|
|
|
* This version number is checked against the current one for INSERT.
|
|
|
|
* That is, we make sure to insert blocks with the correct structure.
|
|
|
|
*
|
|
|
|
* When the server starts, previous ALTER might not have been completed.
|
|
|
|
* Therefore, for the first time, regardless of the changes, we check the structure of all parts,
|
|
|
|
* (Example: /clickhouse/tables/02-06/visits/replicas/example02-06-1.yandex.ru/parts/20140806_20140831_131664_134988_3296/columns)
|
|
|
|
* and do ALTER if necessary.
|
|
|
|
*
|
|
|
|
* TODO: Too complicated, rewrite everything.
|
|
|
|
*/
|
|
|
|
|
|
|
|
auto zookeeper = storage.getZooKeeper();
|
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
String columns_path = storage.zookeeper_path + "/columns";
|
2018-11-28 11:40:59 +00:00
|
|
|
auto columns_znode = zk_node_cache.get(columns_path, task->getWatchCallback());
|
|
|
|
if (!columns_znode.exists)
|
2018-11-01 13:37:36 +00:00
|
|
|
throw Exception(columns_path + " doesn't exist", ErrorCodes::NOT_FOUND_NODE);
|
2018-11-28 11:40:59 +00:00
|
|
|
int32_t columns_version = columns_znode.stat.version;
|
2018-11-01 13:37:36 +00:00
|
|
|
|
|
|
|
String metadata_path = storage.zookeeper_path + "/metadata";
|
2018-11-28 11:40:59 +00:00
|
|
|
auto metadata_znode = zk_node_cache.get(metadata_path, task->getWatchCallback());
|
|
|
|
if (!metadata_znode.exists)
|
2018-11-01 13:37:36 +00:00
|
|
|
throw Exception(metadata_path + " doesn't exist", ErrorCodes::NOT_FOUND_NODE);
|
2018-11-28 11:40:59 +00:00
|
|
|
int32_t metadata_version = metadata_znode.stat.version;
|
2018-11-01 13:37:36 +00:00
|
|
|
|
|
|
|
const bool changed_columns_version = (columns_version != storage.columns_version);
|
|
|
|
const bool changed_metadata_version = (metadata_version != storage.metadata_version);
|
|
|
|
|
|
|
|
if (!(changed_columns_version || changed_metadata_version || force_recheck_parts))
|
|
|
|
return;
|
|
|
|
|
2018-11-28 11:40:59 +00:00
|
|
|
const String & columns_str = columns_znode.contents;
|
2017-12-29 22:32:04 +00:00
|
|
|
auto columns_in_zk = ColumnsDescription::parse(columns_str);
|
|
|
|
|
2018-11-28 11:40:59 +00:00
|
|
|
const String & metadata_str = metadata_znode.contents;
|
2018-11-02 15:39:19 +00:00
|
|
|
auto metadata_in_zk = ReplicatedMergeTreeTableMetadata::parse(metadata_str);
|
2019-05-03 02:00:57 +00:00
|
|
|
auto metadata_diff = ReplicatedMergeTreeTableMetadata(storage).checkAndFindDiff(metadata_in_zk, /* allow_alter = */ true);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-09-04 13:24:55 +00:00
|
|
|
/// If you need to lock table structure, then suspend merges and moves.
|
2019-08-01 15:36:12 +00:00
|
|
|
ActionLock merge_blocker = storage.merger_mutator.merges_blocker.cancel();
|
2019-09-04 13:24:55 +00:00
|
|
|
ActionLock moves_blocker = storage.parts_mover.moves_blocker.cancel();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
MergeTreeData::DataParts parts;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
/// If metadata nodes have changed, we will update table structure locally.
|
|
|
|
if (changed_columns_version || changed_metadata_version)
|
|
|
|
{
|
|
|
|
/// Temporarily cancel part checks to avoid locking for long time.
|
|
|
|
auto temporarily_stop_part_checks = storage.part_check_thread.temporarilyStop();
|
2017-11-17 08:58:35 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
/// Temporarily cancel parts sending
|
|
|
|
ActionLock data_parts_exchange_blocker;
|
|
|
|
if (storage.data_parts_exchange_endpoint_holder)
|
|
|
|
data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->getBlocker().cancel();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
/// Temporarily cancel part fetches
|
|
|
|
auto fetches_blocker = storage.fetcher.blocker.cancel();
|
2017-10-06 16:53:55 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
LOG_INFO(log, "Version of metadata nodes in ZooKeeper changed. Waiting for structure write lock.");
|
2017-10-06 16:53:55 +00:00
|
|
|
|
2019-03-05 10:12:20 +00:00
|
|
|
auto table_lock = storage.lockExclusively(RWLockImpl::NO_QUERY);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-02 15:39:19 +00:00
|
|
|
if (columns_in_zk == storage.getColumns() && metadata_diff.empty())
|
2018-11-01 13:37:36 +00:00
|
|
|
{
|
|
|
|
LOG_INFO(log, "Metadata nodes changed in ZooKeeper, but their contents didn't change. "
|
|
|
|
"Most probably it is a cyclic ALTER.");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally.");
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-02 15:39:19 +00:00
|
|
|
storage.setTableStructure(std::move(columns_in_zk), metadata_diff);
|
2017-11-17 08:58:35 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
LOG_INFO(log, "Applied changes to the metadata of the table.");
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
/// You need to get a list of parts under table lock to avoid race condition with merge.
|
2019-05-03 02:00:57 +00:00
|
|
|
parts = storage.getDataParts();
|
2017-05-14 23:14:21 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
storage.columns_version = columns_version;
|
|
|
|
storage.metadata_version = metadata_version;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
/// Update parts.
|
|
|
|
if (changed_columns_version || force_recheck_parts)
|
|
|
|
{
|
2019-03-07 18:04:47 +00:00
|
|
|
auto table_lock = storage.lockStructureForShare(false, RWLockImpl::NO_QUERY);
|
2017-11-17 08:58:35 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
if (changed_columns_version)
|
|
|
|
LOG_INFO(log, "ALTER-ing parts");
|
2017-11-17 08:58:35 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
int changed_parts = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
if (!changed_columns_version)
|
2019-05-03 02:00:57 +00:00
|
|
|
parts = storage.getDataParts();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
const auto columns_for_parts = storage.getColumns().getAllPhysical();
|
2019-05-02 16:07:23 +00:00
|
|
|
const auto indices_for_parts = storage.getIndices();
|
2017-11-17 08:58:35 +00:00
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
for (const MergeTreeData::DataPartPtr & part : parts)
|
|
|
|
{
|
|
|
|
/// Update the part and write result to temporary files.
|
|
|
|
/// TODO: You can skip checking for too large changes if ZooKeeper has, for example,
|
|
|
|
/// node /flags/force_alter.
|
2019-04-30 23:46:19 +00:00
|
|
|
MergeTreeData::AlterDataPartTransactionPtr transaction(new MergeTreeData::AlterDataPartTransaction(part));
|
2019-05-07 20:46:08 +00:00
|
|
|
storage.alterDataPart(columns_for_parts, indices_for_parts.indices, false, transaction);
|
2019-04-30 23:46:19 +00:00
|
|
|
if (!transaction->isValid())
|
2018-11-01 13:37:36 +00:00
|
|
|
continue;
|
|
|
|
|
2018-12-11 13:30:20 +00:00
|
|
|
storage.updatePartHeaderInZooKeeperAndCommit(zookeeper, *transaction);
|
2018-11-01 13:37:36 +00:00
|
|
|
|
2018-12-11 13:30:20 +00:00
|
|
|
++changed_parts;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2018-11-01 13:37:36 +00:00
|
|
|
/// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN
|
2019-05-03 02:00:57 +00:00
|
|
|
storage.recalculateColumnSizes();
|
2018-11-01 13:37:36 +00:00
|
|
|
|
|
|
|
if (changed_columns_version)
|
|
|
|
{
|
|
|
|
if (changed_parts != 0)
|
|
|
|
LOG_INFO(log, "ALTER-ed " << changed_parts << " parts");
|
|
|
|
else
|
|
|
|
LOG_INFO(log, "No parts ALTER-ed");
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-11-01 13:37:36 +00:00
|
|
|
|
|
|
|
/// Update metadata ZK nodes for a specific replica.
|
|
|
|
if (changed_columns_version || force_recheck_parts)
|
|
|
|
zookeeper->set(storage.replica_path + "/columns", columns_str);
|
|
|
|
if (changed_metadata_version || force_recheck_parts)
|
|
|
|
zookeeper->set(storage.replica_path + "/metadata", metadata_str);
|
|
|
|
|
|
|
|
force_recheck_parts = false;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-08-25 01:58:14 +00:00
|
|
|
catch (const Coordination::Exception & e)
|
2017-12-29 22:32:04 +00:00
|
|
|
{
|
2018-04-10 13:20:14 +00:00
|
|
|
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
2016-04-09 04:22:11 +00:00
|
|
|
|
2018-08-25 01:58:14 +00:00
|
|
|
if (e.code == Coordination::ZSESSIONEXPIRED)
|
2018-04-24 17:11:59 +00:00
|
|
|
return;
|
2018-04-19 18:16:18 +00:00
|
|
|
|
2017-12-29 22:32:04 +00:00
|
|
|
force_recheck_parts = true;
|
2018-05-31 13:05:05 +00:00
|
|
|
task->scheduleAfter(ALTER_ERROR_SLEEP_MS);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-04-24 17:11:59 +00:00
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
2016-04-09 04:22:11 +00:00
|
|
|
|
2018-04-24 17:11:59 +00:00
|
|
|
force_recheck_parts = true;
|
2018-05-31 13:05:05 +00:00
|
|
|
task->scheduleAfter(ALTER_ERROR_SLEEP_MS);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-04-09 04:22:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|