diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 779ef5612e6..35c112da4d1 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -6,6 +6,7 @@ #include namespace DB +{ namespace ErrorCodes { @@ -14,7 +15,7 @@ namespace ErrorCodes extern const int UNFINISHED; extern const int BAD_ARGUMENTS; } -{ + void ClusterCopier::init() { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 433bf55cbdb..315a6e2d6f5 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -126,42 +126,6 @@ static const auto QUEUE_UPDATE_ERROR_SLEEP_MS = 1 * 1000; static const auto MERGE_SELECTING_SLEEP_MS = 5 * 1000; static const auto MUTATIONS_FINALIZING_SLEEP_MS = 1 * 1000; -/** There are three places for each part, where it should be - * 1. In the RAM, data_parts, all_data_parts. - * 2. In the filesystem (FS), the directory with the data of the table. - * 3. in ZooKeeper (ZK). - * - * When adding a part, it must be added immediately to these three places. - * This is done like this - * - [FS] first write the part into a temporary directory on the filesystem; - * - [FS] rename the temporary part to the result on the filesystem; - * - [RAM] immediately afterwards add it to the `data_parts`, and remove from `data_parts` any parts covered by this one; - * - [RAM] also set the `Transaction` object, which in case of an exception (in next point), - * rolls back the changes in `data_parts` (from the previous point) back; - * - [ZK] then send a transaction (multi) to add a part to ZooKeeper (and some more actions); - * - [FS, ZK] by the way, removing the covered (old) parts from filesystem, from ZooKeeper and from `all_data_parts` - * is delayed, after a few minutes. - * - * There is no atomicity here. - * It could be possible to achieve atomicity using undo/redo logs and a flag in `DataPart` when it is completely ready. - * But it would be inconvenient - I would have to write undo/redo logs for each `Part` in ZK, and this would increase already large number of interactions. - * - * Instead, we are forced to work in a situation where at any time - * (from another thread, or after server restart), there may be an unfinished transaction. - * (note - for this the part should be in RAM) - * From these cases the most frequent one is when the part is already in the data_parts, but it's not yet in ZooKeeper. - * This case must be distinguished from the case where such a situation is achieved due to some kind of damage to the state. - * - * Do this with the threshold for the time. - * If the part is young enough, its lack in ZooKeeper will be perceived optimistically - as if it just did not have time to be added there - * - as if the transaction has not yet been executed, but will soon be executed. - * And if the part is old, its absence in ZooKeeper will be perceived as an unfinished transaction that needs to be rolled back. - * - * PS. Perhaps it would be better to add a flag to the DataPart that a part is inserted into ZK. - * But here it's too easy to get confused with the consistency of this flag. - */ -extern const int MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER = 5 * 60; - void StorageReplicatedMergeTree::setZooKeeper(zkutil::ZooKeeperPtr zookeeper) { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index a6b1ac7d73c..58789e48dc5 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -555,5 +555,40 @@ protected: }; +/** There are three places for each part, where it should be + * 1. In the RAM, data_parts, all_data_parts. + * 2. In the filesystem (FS), the directory with the data of the table. + * 3. in ZooKeeper (ZK). + * + * When adding a part, it must be added immediately to these three places. + * This is done like this + * - [FS] first write the part into a temporary directory on the filesystem; + * - [FS] rename the temporary part to the result on the filesystem; + * - [RAM] immediately afterwards add it to the `data_parts`, and remove from `data_parts` any parts covered by this one; + * - [RAM] also set the `Transaction` object, which in case of an exception (in next point), + * rolls back the changes in `data_parts` (from the previous point) back; + * - [ZK] then send a transaction (multi) to add a part to ZooKeeper (and some more actions); + * - [FS, ZK] by the way, removing the covered (old) parts from filesystem, from ZooKeeper and from `all_data_parts` + * is delayed, after a few minutes. + * + * There is no atomicity here. + * It could be possible to achieve atomicity using undo/redo logs and a flag in `DataPart` when it is completely ready. + * But it would be inconvenient - I would have to write undo/redo logs for each `Part` in ZK, and this would increase already large number of interactions. + * + * Instead, we are forced to work in a situation where at any time + * (from another thread, or after server restart), there may be an unfinished transaction. + * (note - for this the part should be in RAM) + * From these cases the most frequent one is when the part is already in the data_parts, but it's not yet in ZooKeeper. + * This case must be distinguished from the case where such a situation is achieved due to some kind of damage to the state. + * + * Do this with the threshold for the time. + * If the part is young enough, its lack in ZooKeeper will be perceived optimistically - as if it just did not have time to be added there + * - as if the transaction has not yet been executed, but will soon be executed. + * And if the part is old, its absence in ZooKeeper will be perceived as an unfinished transaction that needs to be rolled back. + * + * PS. Perhaps it would be better to add a flag to the DataPart that a part is inserted into ZK. + * But here it's too easy to get confused with the consistency of this flag. + */ +#define MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER (5 * 60) }