Add clearer RangeFiltered implementation. [#CLICKHOUSE-3178]

This commit is contained in:
Vitaliy Lyudvichenko 2017-09-22 00:51:17 +03:00
parent e2a12d1088
commit db3a67a421
7 changed files with 196 additions and 51 deletions

1
.gitignore vendored
View File

@ -35,6 +35,7 @@ cmake_install.cmake
CTestTestfile.cmake
*.a
*.o
cmake-build-*
# Python cache
*.pyc

View File

@ -393,10 +393,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
LOG_DEBUG(log, "Loading data parts");
std::lock_guard<std::mutex> lock(data_parts_mutex);
//std::lock_guard<std::mutex> lock_all(all_data_parts_mutex);
data_parts.clear();
//all_data_parts.clear();
Strings part_file_names;
Poco::DirectoryIterator end;
@ -496,6 +493,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
}
part->modification_time = Poco::File(full_path + file_name).getLastModified().epochTime();
/// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later
part->state = DataPartState::Committed;
data_parts.insert(part);
@ -510,18 +508,17 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
for (auto & part : broken_parts_to_detach)
part->renameAddPrefix(true, "");
//all_data_parts = data_parts;
/// Delete from the set of current parts those parts that are covered by another part (those parts that
/// were merged), but that for some reason are still not deleted from the filesystem.
/// Deletion of files will be performed later in the clearOldParts() method.
if (data_parts.size() >= 2)
{
DataParts::iterator prev_jt = data_parts.begin();
DataParts::iterator curr_jt = prev_jt;
++curr_jt;
while (curr_jt != data_parts.end())
auto committed_parts = getDataPartsRange({DataPartState::Committed});
auto prev_jt = committed_parts.begin();
auto curr_jt = std::next(prev_jt);
while (curr_jt != committed_parts.end())
{
/// Don't consider data parts belonging to different partitions.
if ((*curr_jt)->info.partition_id != (*prev_jt)->info.partition_id)
@ -534,14 +531,15 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
if ((*curr_jt)->contains(**prev_jt))
{
(*prev_jt)->remove_time = (*prev_jt)->modification_time;
data_parts.erase(prev_jt);
(*prev_jt)->state = DataPartState::Outdated; /// prev_jt becomes invalid here
prev_jt = curr_jt;
++curr_jt;
}
else if ((*prev_jt)->contains(**curr_jt))
{
(*curr_jt)->remove_time = (*curr_jt)->modification_time;
data_parts.erase(curr_jt++);
(*curr_jt)->state = DataPartState::Outdated; /// curr_jt becomes invalid here
++curr_jt;
}
else
{
@ -651,7 +649,7 @@ void MergeTreeData::rollbackDeletingParts(const MergeTreeData::DataPartsVector &
for (auto & part : parts)
{
/// We should modify it under data_parts_mutex
part->checkState({DataPartState::Deleting});
part->assertState({DataPartState::Deleting});
part->state = DataPartState::Outdated;
}
}
@ -709,12 +707,10 @@ void MergeTreeData::dropAllData()
LOG_TRACE(log, "dropAllData: waiting for locks.");
std::lock_guard<std::mutex> lock(data_parts_mutex);
//std::lock_guard<std::mutex> lock_all(all_data_parts_mutex);
LOG_TRACE(log, "dropAllData: removing data from memory.");
data_parts.clear();
//all_data_parts.clear();
column_sizes.clear();
context.dropCaches();
@ -897,7 +893,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
const IDataType * observed_type;
if (is_nullable)
{
const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(*column.type);
auto & nullable_type = static_cast<const DataTypeNullable &>(*column.type);
observed_type = nullable_type.getNestedType().get();
}
else
@ -1321,7 +1317,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
if (out_transaction && out_transaction->data)
throw Exception("Using the same MergeTreeData::Transaction for overlapping transactions is invalid", ErrorCodes::LOGICAL_ERROR);
part->checkState({DataPartState::Temporary});
part->assertState({DataPartState::Temporary});
DataPartsVector replaced;
{
@ -1354,7 +1350,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
if (it_duplicate != data_parts.end())
{
String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists";
if ((*it_duplicate)->tryCheckState({DataPartState::Outdated, DataPartState::Deleting}))
if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting}))
message += ", but it will be deleted soon";
throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART);
@ -1369,7 +1365,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
auto check_replacing_part_state = [&] (const DataPartPtr & cur_part)
{
cur_part->checkState({DataPartState::PreCommitted, DataPartState::Committed});
cur_part->assertState({DataPartState::PreCommitted, DataPartState::Committed});
if (cur_part->state == DataPartState::PreCommitted)
throw Exception("Could not add part " + new_name + " while replacing part " + cur_part->name + " is in pre-committed state", ErrorCodes::LOGICAL_ERROR);
};
@ -1486,7 +1482,7 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo
if (!data_parts.count(part))
throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR);
part->checkState({DataPartState::PreCommitted, DataPartState::Committed, DataPartState::Outdated});
part->assertState({DataPartState::PreCommitted, DataPartState::Committed, DataPartState::Outdated});
}
auto remove_time = clear_without_timeout ? 0 : time(nullptr);
@ -1748,7 +1744,8 @@ void MergeTreeData::calculateColumnSizesImpl()
{
column_sizes.clear();
for (const auto & part : data_parts)
/// Take into account only committed parts
for (const auto & part : getDataPartsRange({DataPartState::Committed}))
addPartContributionToColumnSizes(part);
}
@ -2075,9 +2072,9 @@ void MergeTreeData::Transaction::replaceParts(MergeTreeData::DataPartState move_
std::lock_guard<std::mutex> lock(data->data_parts_mutex);
for (auto & part : committed_parts)
part->checkState({DataPartState::Committed});
part->assertState({DataPartState::Committed});
for (auto & part : precommitted_parts)
part->checkState({DataPartState::PreCommitted});
part->assertState({DataPartState::PreCommitted});
/// If it is rollback then do nothing, else make it Outdated and remove their size contribution
if (move_committed_to != DataPartState::Committed)

View File

@ -139,6 +139,9 @@ struct MergeTreeDataPart
/// If true, the destructor will delete the directory with the part.
bool is_temp = false;
/// For resharding.
size_t shard_no = 0;
/**
* Part state is a stage of its lifetime. States are ordered and state of a part could be increased only.
* Part state should be modified under data_parts mutex.
@ -159,6 +162,7 @@ struct MergeTreeDataPart
Deleting /// not active data part with identity refcounter, it is deleting right now by a cleaner
};
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
mutable State state{State::Temporary};
/// Returns name of state
@ -171,7 +175,7 @@ struct MergeTreeDataPart
}
/// Returns true if state of part is one of affordable_states
bool tryCheckState(std::initializer_list<State> affordable_states) const
bool checkState(const std::initializer_list<State> & affordable_states) const
{
for (auto affordable_state : affordable_states)
{
@ -182,9 +186,9 @@ struct MergeTreeDataPart
}
/// Throws an exception if state of the part is not in affordable_states
void checkState(std::initializer_list<State> affordable_states) const
void assertState(const std::initializer_list<State> & affordable_states) const
{
if (!tryCheckState(affordable_states))
if (!checkState(affordable_states))
{
String states_str;
for (auto state : affordable_states)
@ -194,16 +198,23 @@ struct MergeTreeDataPart
}
}
/// Returns a lambda that returns true only for part with states from specified list
static inline decltype(auto) getStatesFilter(std::initializer_list<State> affordable_states)
/// In comparison with lambdas, it is move assignable and could has several overloaded operator()
struct StatesFilter
{
return [affordable_states] (const std::shared_ptr<const MergeTreeDataPart> & part) {
return part->tryCheckState(affordable_states);
};
}
std::initializer_list<State> affordable_states;
StatesFilter(const std::initializer_list<State> & affordable_states) : affordable_states(affordable_states) {}
/// For resharding.
size_t shard_no = 0;
bool operator() (const std::shared_ptr<const MergeTreeDataPart> & part) const
{
return part->checkState(affordable_states);
}
};
/// Returns a lambda that returns true only for part with states from specified list
static inline StatesFilter getStatesFilter(const std::initializer_list<State> & affordable_states)
{
return StatesFilter(affordable_states);
}
/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.

View File

@ -3534,10 +3534,10 @@ void StorageReplicatedMergeTree::reshardPartitions(
/// Make a list of local partitions that need to be resharded.
std::set<std::string> unique_partition_list;
const MergeTreeData::DataParts & data_parts = data.getDataParts();
for (MergeTreeData::DataParts::iterator it = data_parts.cbegin(); it != data_parts.cend(); ++it)
auto data_parts = data.getDataParts();
for (auto & part : data_parts)
{
const String & current_partition_id = (*it)->info.partition_id;
const String & current_partition_id = part->info.partition_id;
if (include_all || partition_id == current_partition_id)
unique_partition_list.insert(current_partition_id);
}

View File

@ -0,0 +1,45 @@
#include <gtest/gtest.h>
#include <common/RangeFiltered.h>
#include <vector>
#include <set>
TEST(RangeFiltered, simple)
{
std::vector<int> v;
for (int i = 0; i < 10; ++i)
v.push_back(i);
auto v30 = createRangeFiltered([] (int i) { return i % 3 == 0;}, v);
auto v31 = createRangeFiltered([] (int i) { return i % 3 != 0;}, v);
for (const int & i : v30)
ASSERT_EQ(i % 3, 0);
for (const int & i : v31)
ASSERT_NE(i % 3, 0);
{
auto it = v30.begin();
ASSERT_EQ(*it, 0);
auto it2 = std::next(it);
ASSERT_EQ(*it2, 3);
auto it3 = it;
it = std::next(it2);
ASSERT_EQ(*it, 6);
}
{
auto it = std::next(v30.begin());
ASSERT_EQ(*it, 3);
*it = 2; /// it becomes invalid
ASSERT_EQ(*(++it), 6); /// but iteration is sucessfull
*v30.begin() = 1;
ASSERT_EQ(*v30.begin(), 6);
}
}

View File

@ -102,4 +102,4 @@ done
except Exception as e:
pass
# Uncomment when problem will be fixed
# check_timeout_exception(e)
check_timeout_exception(e)

View File

@ -1,34 +1,125 @@
#pragma once
#include <type_traits>
#include <boost/range/adaptor/filtered.hpp>
/// Similar to boost::filtered_range but a little bit easier and allows to convert ordinary iterators to filtered
template <typename F, typename C>
struct RangeFiltered : public boost::iterator_range<boost::filter_iterator<F, decltype(std::end(C()))>>
struct RangeFiltered
{
using RawIterator = decltype(std::end(C()));
using FilterIterator = boost::filter_iterator<F, RawIterator>;
using Base = boost::iterator_range<FilterIterator>;
using RawIterator = typename C:: iterator;
class Iterator;
RangeFiltered(const F & filter, const C & container)
: Base(
FilterIterator(filter, std::begin(container), std::end(container)),
FilterIterator(filter, std::end(container), std::end(container))),
filter(filter) {}
/// Will iterate over elements for which filter(*it) == true
RangeFiltered(F && filter, C & container)
: filter(std::move(filter)), container(container) {}
Iterator begin() const
{
return {*this, std::begin(container)};
}
Iterator end() const
{
return {*this, std::end(container)};
}
/// Convert ordinary iterator to filtered one
/// Real position will be in range [ordinary_iterator; end()], so it is suitable to use with lower[upper]_bound()
inline FilterIterator convert(RawIterator ordinary_iterator) const
inline Iterator convert(RawIterator ordinary_iterator) const
{
return {filter, ordinary_iterator, std::end(*this).end()};
return {*this, ordinary_iterator};
}
/// It is similar to boost::filtered_iterator, but has additional features:
/// it doesn't store end() iterator
/// it doesn't store predicate, so it allows to implement operator=()
/// it guarantees that operator++() works properly in case of filter(*it) == false
class Iterator
{
public:
using Range = RangeFiltered<F, C>;
typedef Iterator self_type;
typedef typename std::iterator_traits<RawIterator>::value_type value_type;
typedef typename std::iterator_traits<RawIterator>::reference reference;
typedef const value_type & const_reference;
typedef typename std::iterator_traits<RawIterator>::pointer pointer;
typedef const value_type * const_pointer;
typedef typename std::iterator_traits<RawIterator>::difference_type difference_type;
typedef std::bidirectional_iterator_tag iterator_category;
Iterator(const Range & range_, RawIterator iter_)
: range(&range_), iter(iter_)
{
for (; iter != std::end(range->container) && !range->filter(*iter); ++iter);
}
Iterator(const Iterator & rhs) = default;
Iterator(Iterator && rhs) noexcept = default;
Iterator operator++()
{
++iter;
for (; iter != std::end(range->container) && !range->filter(*iter); ++iter);
return *this;
}
Iterator operator--()
{
--iter;
for (; !range->filter(*iter); --iter); /// Don't check std::begin() bound
return *this;
}
pointer operator->()
{
return iter.operator->();
}
const_pointer operator->() const
{
return iter.operator->();
}
reference operator*()
{
return *iter;
}
const_reference operator*() const
{
return *iter;
}
bool operator==(const self_type & rhs) const
{
return iter == rhs.iter;
}
bool operator!=(const self_type & rhs) const
{
return iter != rhs.iter;
}
self_type & operator=(const self_type & rhs) = default;
self_type & operator=(self_type && rhs) noexcept = default;
~Iterator() = default;
private:
const Range * range = nullptr;
RawIterator iter;
};
protected:
F filter;
C & container;
};
template <typename F, typename C>
inline decltype(auto) createRangeFiltered(F && filter, C && container)
inline RangeFiltered<std::decay_t<F>, std::decay_t<C>> createRangeFiltered(F && filter, C && container)
{
return RangeFiltered<std::decay_t<F>, std::decay_t<C>>{std::forward<F>(filter), std::forward<C>(container)};
return {std::forward<F>(filter), std::forward<C>(container)};
};