mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Add clearer RangeFiltered implementation. [#CLICKHOUSE-3178]
This commit is contained in:
parent
e2a12d1088
commit
db3a67a421
1
.gitignore
vendored
1
.gitignore
vendored
@ -35,6 +35,7 @@ cmake_install.cmake
|
||||
CTestTestfile.cmake
|
||||
*.a
|
||||
*.o
|
||||
cmake-build-*
|
||||
|
||||
# Python cache
|
||||
*.pyc
|
||||
|
@ -393,10 +393,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
LOG_DEBUG(log, "Loading data parts");
|
||||
|
||||
std::lock_guard<std::mutex> lock(data_parts_mutex);
|
||||
//std::lock_guard<std::mutex> lock_all(all_data_parts_mutex);
|
||||
|
||||
data_parts.clear();
|
||||
//all_data_parts.clear();
|
||||
|
||||
Strings part_file_names;
|
||||
Poco::DirectoryIterator end;
|
||||
@ -496,6 +493,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
}
|
||||
|
||||
part->modification_time = Poco::File(full_path + file_name).getLastModified().epochTime();
|
||||
/// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later
|
||||
part->state = DataPartState::Committed;
|
||||
|
||||
data_parts.insert(part);
|
||||
@ -510,18 +508,17 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
for (auto & part : broken_parts_to_detach)
|
||||
part->renameAddPrefix(true, "");
|
||||
|
||||
//all_data_parts = data_parts;
|
||||
|
||||
/// Delete from the set of current parts those parts that are covered by another part (those parts that
|
||||
/// were merged), but that for some reason are still not deleted from the filesystem.
|
||||
/// Deletion of files will be performed later in the clearOldParts() method.
|
||||
|
||||
if (data_parts.size() >= 2)
|
||||
{
|
||||
DataParts::iterator prev_jt = data_parts.begin();
|
||||
DataParts::iterator curr_jt = prev_jt;
|
||||
++curr_jt;
|
||||
while (curr_jt != data_parts.end())
|
||||
auto committed_parts = getDataPartsRange({DataPartState::Committed});
|
||||
auto prev_jt = committed_parts.begin();
|
||||
auto curr_jt = std::next(prev_jt);
|
||||
|
||||
while (curr_jt != committed_parts.end())
|
||||
{
|
||||
/// Don't consider data parts belonging to different partitions.
|
||||
if ((*curr_jt)->info.partition_id != (*prev_jt)->info.partition_id)
|
||||
@ -534,14 +531,15 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
if ((*curr_jt)->contains(**prev_jt))
|
||||
{
|
||||
(*prev_jt)->remove_time = (*prev_jt)->modification_time;
|
||||
data_parts.erase(prev_jt);
|
||||
(*prev_jt)->state = DataPartState::Outdated; /// prev_jt becomes invalid here
|
||||
prev_jt = curr_jt;
|
||||
++curr_jt;
|
||||
}
|
||||
else if ((*prev_jt)->contains(**curr_jt))
|
||||
{
|
||||
(*curr_jt)->remove_time = (*curr_jt)->modification_time;
|
||||
data_parts.erase(curr_jt++);
|
||||
(*curr_jt)->state = DataPartState::Outdated; /// curr_jt becomes invalid here
|
||||
++curr_jt;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -651,7 +649,7 @@ void MergeTreeData::rollbackDeletingParts(const MergeTreeData::DataPartsVector &
|
||||
for (auto & part : parts)
|
||||
{
|
||||
/// We should modify it under data_parts_mutex
|
||||
part->checkState({DataPartState::Deleting});
|
||||
part->assertState({DataPartState::Deleting});
|
||||
part->state = DataPartState::Outdated;
|
||||
}
|
||||
}
|
||||
@ -709,12 +707,10 @@ void MergeTreeData::dropAllData()
|
||||
LOG_TRACE(log, "dropAllData: waiting for locks.");
|
||||
|
||||
std::lock_guard<std::mutex> lock(data_parts_mutex);
|
||||
//std::lock_guard<std::mutex> lock_all(all_data_parts_mutex);
|
||||
|
||||
LOG_TRACE(log, "dropAllData: removing data from memory.");
|
||||
|
||||
data_parts.clear();
|
||||
//all_data_parts.clear();
|
||||
column_sizes.clear();
|
||||
|
||||
context.dropCaches();
|
||||
@ -897,7 +893,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
|
||||
const IDataType * observed_type;
|
||||
if (is_nullable)
|
||||
{
|
||||
const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(*column.type);
|
||||
auto & nullable_type = static_cast<const DataTypeNullable &>(*column.type);
|
||||
observed_type = nullable_type.getNestedType().get();
|
||||
}
|
||||
else
|
||||
@ -1321,7 +1317,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
|
||||
if (out_transaction && out_transaction->data)
|
||||
throw Exception("Using the same MergeTreeData::Transaction for overlapping transactions is invalid", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
part->checkState({DataPartState::Temporary});
|
||||
part->assertState({DataPartState::Temporary});
|
||||
|
||||
DataPartsVector replaced;
|
||||
{
|
||||
@ -1354,7 +1350,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
|
||||
if (it_duplicate != data_parts.end())
|
||||
{
|
||||
String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists";
|
||||
if ((*it_duplicate)->tryCheckState({DataPartState::Outdated, DataPartState::Deleting}))
|
||||
if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting}))
|
||||
message += ", but it will be deleted soon";
|
||||
|
||||
throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART);
|
||||
@ -1369,7 +1365,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
|
||||
|
||||
auto check_replacing_part_state = [&] (const DataPartPtr & cur_part)
|
||||
{
|
||||
cur_part->checkState({DataPartState::PreCommitted, DataPartState::Committed});
|
||||
cur_part->assertState({DataPartState::PreCommitted, DataPartState::Committed});
|
||||
if (cur_part->state == DataPartState::PreCommitted)
|
||||
throw Exception("Could not add part " + new_name + " while replacing part " + cur_part->name + " is in pre-committed state", ErrorCodes::LOGICAL_ERROR);
|
||||
};
|
||||
@ -1486,7 +1482,7 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo
|
||||
if (!data_parts.count(part))
|
||||
throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
part->checkState({DataPartState::PreCommitted, DataPartState::Committed, DataPartState::Outdated});
|
||||
part->assertState({DataPartState::PreCommitted, DataPartState::Committed, DataPartState::Outdated});
|
||||
}
|
||||
|
||||
auto remove_time = clear_without_timeout ? 0 : time(nullptr);
|
||||
@ -1748,7 +1744,8 @@ void MergeTreeData::calculateColumnSizesImpl()
|
||||
{
|
||||
column_sizes.clear();
|
||||
|
||||
for (const auto & part : data_parts)
|
||||
/// Take into account only committed parts
|
||||
for (const auto & part : getDataPartsRange({DataPartState::Committed}))
|
||||
addPartContributionToColumnSizes(part);
|
||||
}
|
||||
|
||||
@ -2075,9 +2072,9 @@ void MergeTreeData::Transaction::replaceParts(MergeTreeData::DataPartState move_
|
||||
std::lock_guard<std::mutex> lock(data->data_parts_mutex);
|
||||
|
||||
for (auto & part : committed_parts)
|
||||
part->checkState({DataPartState::Committed});
|
||||
part->assertState({DataPartState::Committed});
|
||||
for (auto & part : precommitted_parts)
|
||||
part->checkState({DataPartState::PreCommitted});
|
||||
part->assertState({DataPartState::PreCommitted});
|
||||
|
||||
/// If it is rollback then do nothing, else make it Outdated and remove their size contribution
|
||||
if (move_committed_to != DataPartState::Committed)
|
||||
|
@ -139,6 +139,9 @@ struct MergeTreeDataPart
|
||||
/// If true, the destructor will delete the directory with the part.
|
||||
bool is_temp = false;
|
||||
|
||||
/// For resharding.
|
||||
size_t shard_no = 0;
|
||||
|
||||
/**
|
||||
* Part state is a stage of its lifetime. States are ordered and state of a part could be increased only.
|
||||
* Part state should be modified under data_parts mutex.
|
||||
@ -159,6 +162,7 @@ struct MergeTreeDataPart
|
||||
Deleting /// not active data part with identity refcounter, it is deleting right now by a cleaner
|
||||
};
|
||||
|
||||
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
|
||||
mutable State state{State::Temporary};
|
||||
|
||||
/// Returns name of state
|
||||
@ -171,7 +175,7 @@ struct MergeTreeDataPart
|
||||
}
|
||||
|
||||
/// Returns true if state of part is one of affordable_states
|
||||
bool tryCheckState(std::initializer_list<State> affordable_states) const
|
||||
bool checkState(const std::initializer_list<State> & affordable_states) const
|
||||
{
|
||||
for (auto affordable_state : affordable_states)
|
||||
{
|
||||
@ -182,9 +186,9 @@ struct MergeTreeDataPart
|
||||
}
|
||||
|
||||
/// Throws an exception if state of the part is not in affordable_states
|
||||
void checkState(std::initializer_list<State> affordable_states) const
|
||||
void assertState(const std::initializer_list<State> & affordable_states) const
|
||||
{
|
||||
if (!tryCheckState(affordable_states))
|
||||
if (!checkState(affordable_states))
|
||||
{
|
||||
String states_str;
|
||||
for (auto state : affordable_states)
|
||||
@ -194,16 +198,23 @@ struct MergeTreeDataPart
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a lambda that returns true only for part with states from specified list
|
||||
static inline decltype(auto) getStatesFilter(std::initializer_list<State> affordable_states)
|
||||
/// In comparison with lambdas, it is move assignable and could has several overloaded operator()
|
||||
struct StatesFilter
|
||||
{
|
||||
return [affordable_states] (const std::shared_ptr<const MergeTreeDataPart> & part) {
|
||||
return part->tryCheckState(affordable_states);
|
||||
};
|
||||
}
|
||||
std::initializer_list<State> affordable_states;
|
||||
StatesFilter(const std::initializer_list<State> & affordable_states) : affordable_states(affordable_states) {}
|
||||
|
||||
/// For resharding.
|
||||
size_t shard_no = 0;
|
||||
bool operator() (const std::shared_ptr<const MergeTreeDataPart> & part) const
|
||||
{
|
||||
return part->checkState(affordable_states);
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns a lambda that returns true only for part with states from specified list
|
||||
static inline StatesFilter getStatesFilter(const std::initializer_list<State> & affordable_states)
|
||||
{
|
||||
return StatesFilter(affordable_states);
|
||||
}
|
||||
|
||||
/// Primary key (correspond to primary.idx file).
|
||||
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
|
||||
|
@ -3534,10 +3534,10 @@ void StorageReplicatedMergeTree::reshardPartitions(
|
||||
|
||||
/// Make a list of local partitions that need to be resharded.
|
||||
std::set<std::string> unique_partition_list;
|
||||
const MergeTreeData::DataParts & data_parts = data.getDataParts();
|
||||
for (MergeTreeData::DataParts::iterator it = data_parts.cbegin(); it != data_parts.cend(); ++it)
|
||||
auto data_parts = data.getDataParts();
|
||||
for (auto & part : data_parts)
|
||||
{
|
||||
const String & current_partition_id = (*it)->info.partition_id;
|
||||
const String & current_partition_id = part->info.partition_id;
|
||||
if (include_all || partition_id == current_partition_id)
|
||||
unique_partition_list.insert(current_partition_id);
|
||||
}
|
||||
|
45
dbms/src/Storages/tests/gtest_range_filtered.cpp.cpp
Normal file
45
dbms/src/Storages/tests/gtest_range_filtered.cpp.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <common/RangeFiltered.h>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
|
||||
TEST(RangeFiltered, simple)
|
||||
{
|
||||
std::vector<int> v;
|
||||
|
||||
for (int i = 0; i < 10; ++i)
|
||||
v.push_back(i);
|
||||
|
||||
auto v30 = createRangeFiltered([] (int i) { return i % 3 == 0;}, v);
|
||||
auto v31 = createRangeFiltered([] (int i) { return i % 3 != 0;}, v);
|
||||
|
||||
for (const int & i : v30)
|
||||
ASSERT_EQ(i % 3, 0);
|
||||
|
||||
for (const int & i : v31)
|
||||
ASSERT_NE(i % 3, 0);
|
||||
|
||||
{
|
||||
auto it = v30.begin();
|
||||
ASSERT_EQ(*it, 0);
|
||||
|
||||
auto it2 = std::next(it);
|
||||
ASSERT_EQ(*it2, 3);
|
||||
|
||||
auto it3 = it;
|
||||
it = std::next(it2);
|
||||
ASSERT_EQ(*it, 6);
|
||||
}
|
||||
|
||||
{
|
||||
auto it = std::next(v30.begin());
|
||||
ASSERT_EQ(*it, 3);
|
||||
|
||||
*it = 2; /// it becomes invalid
|
||||
ASSERT_EQ(*(++it), 6); /// but iteration is sucessfull
|
||||
|
||||
*v30.begin() = 1;
|
||||
ASSERT_EQ(*v30.begin(), 6);
|
||||
}
|
||||
}
|
@ -102,4 +102,4 @@ done
|
||||
except Exception as e:
|
||||
pass
|
||||
# Uncomment when problem will be fixed
|
||||
# check_timeout_exception(e)
|
||||
check_timeout_exception(e)
|
||||
|
@ -1,34 +1,125 @@
|
||||
#pragma once
|
||||
#include <type_traits>
|
||||
#include <boost/range/adaptor/filtered.hpp>
|
||||
|
||||
|
||||
/// Similar to boost::filtered_range but a little bit easier and allows to convert ordinary iterators to filtered
|
||||
template <typename F, typename C>
|
||||
struct RangeFiltered : public boost::iterator_range<boost::filter_iterator<F, decltype(std::end(C()))>>
|
||||
struct RangeFiltered
|
||||
{
|
||||
using RawIterator = decltype(std::end(C()));
|
||||
using FilterIterator = boost::filter_iterator<F, RawIterator>;
|
||||
using Base = boost::iterator_range<FilterIterator>;
|
||||
using RawIterator = typename C:: iterator;
|
||||
class Iterator;
|
||||
|
||||
RangeFiltered(const F & filter, const C & container)
|
||||
: Base(
|
||||
FilterIterator(filter, std::begin(container), std::end(container)),
|
||||
FilterIterator(filter, std::end(container), std::end(container))),
|
||||
filter(filter) {}
|
||||
/// Will iterate over elements for which filter(*it) == true
|
||||
RangeFiltered(F && filter, C & container)
|
||||
: filter(std::move(filter)), container(container) {}
|
||||
|
||||
Iterator begin() const
|
||||
{
|
||||
return {*this, std::begin(container)};
|
||||
}
|
||||
|
||||
Iterator end() const
|
||||
{
|
||||
return {*this, std::end(container)};
|
||||
}
|
||||
|
||||
/// Convert ordinary iterator to filtered one
|
||||
/// Real position will be in range [ordinary_iterator; end()], so it is suitable to use with lower[upper]_bound()
|
||||
inline FilterIterator convert(RawIterator ordinary_iterator) const
|
||||
inline Iterator convert(RawIterator ordinary_iterator) const
|
||||
{
|
||||
return {filter, ordinary_iterator, std::end(*this).end()};
|
||||
return {*this, ordinary_iterator};
|
||||
}
|
||||
|
||||
|
||||
/// It is similar to boost::filtered_iterator, but has additional features:
|
||||
/// it doesn't store end() iterator
|
||||
/// it doesn't store predicate, so it allows to implement operator=()
|
||||
/// it guarantees that operator++() works properly in case of filter(*it) == false
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
using Range = RangeFiltered<F, C>;
|
||||
|
||||
typedef Iterator self_type;
|
||||
typedef typename std::iterator_traits<RawIterator>::value_type value_type;
|
||||
typedef typename std::iterator_traits<RawIterator>::reference reference;
|
||||
typedef const value_type & const_reference;
|
||||
typedef typename std::iterator_traits<RawIterator>::pointer pointer;
|
||||
typedef const value_type * const_pointer;
|
||||
typedef typename std::iterator_traits<RawIterator>::difference_type difference_type;
|
||||
typedef std::bidirectional_iterator_tag iterator_category;
|
||||
|
||||
Iterator(const Range & range_, RawIterator iter_)
|
||||
: range(&range_), iter(iter_)
|
||||
{
|
||||
for (; iter != std::end(range->container) && !range->filter(*iter); ++iter);
|
||||
}
|
||||
|
||||
Iterator(const Iterator & rhs) = default;
|
||||
Iterator(Iterator && rhs) noexcept = default;
|
||||
|
||||
Iterator operator++()
|
||||
{
|
||||
++iter;
|
||||
for (; iter != std::end(range->container) && !range->filter(*iter); ++iter);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Iterator operator--()
|
||||
{
|
||||
--iter;
|
||||
for (; !range->filter(*iter); --iter); /// Don't check std::begin() bound
|
||||
return *this;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return iter.operator->();
|
||||
}
|
||||
|
||||
const_pointer operator->() const
|
||||
{
|
||||
return iter.operator->();
|
||||
}
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return *iter;
|
||||
}
|
||||
|
||||
const_reference operator*() const
|
||||
{
|
||||
return *iter;
|
||||
}
|
||||
|
||||
bool operator==(const self_type & rhs) const
|
||||
{
|
||||
return iter == rhs.iter;
|
||||
}
|
||||
|
||||
bool operator!=(const self_type & rhs) const
|
||||
{
|
||||
return iter != rhs.iter;
|
||||
}
|
||||
|
||||
self_type & operator=(const self_type & rhs) = default;
|
||||
self_type & operator=(self_type && rhs) noexcept = default;
|
||||
|
||||
~Iterator() = default;
|
||||
|
||||
private:
|
||||
const Range * range = nullptr;
|
||||
RawIterator iter;
|
||||
};
|
||||
|
||||
protected:
|
||||
F filter;
|
||||
C & container;
|
||||
};
|
||||
|
||||
|
||||
template <typename F, typename C>
|
||||
inline decltype(auto) createRangeFiltered(F && filter, C && container)
|
||||
inline RangeFiltered<std::decay_t<F>, std::decay_t<C>> createRangeFiltered(F && filter, C && container)
|
||||
{
|
||||
return RangeFiltered<std::decay_t<F>, std::decay_t<C>>{std::forward<F>(filter), std::forward<C>(container)};
|
||||
return {std::forward<F>(filter), std::forward<C>(container)};
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user