dbms: Server: Various optimizations. [#METR-17276]

This commit is contained in:
Alexey Arno 2015-07-24 19:46:00 +03:00
parent 4bf6fe2070
commit b3bcc4f668
5 changed files with 163 additions and 43 deletions

View File

@ -2,7 +2,7 @@
#include <DB/Common/HashTable/SmallTable.h>
#include <DB/Common/HashTable/HashSet.h>
#include <DB/Common/HyperLogLogWithSmallSetOptimization.h>
#include <statdaemons/HyperLogLogCounter.h>
#include <DB/Core/Defines.h>
@ -16,7 +16,7 @@ enum class ContainerType : UInt8 { SMALL = 1, MEDIUM = 2, LARGE = 3 };
static inline ContainerType max(const ContainerType & lhs, const ContainerType & rhs)
{
unsigned int res = std::max(static_cast<unsigned int>(lhs), static_cast<unsigned int>(rhs));
UInt8 res = std::max(static_cast<UInt8>(lhs), static_cast<UInt8>(rhs));
return static_cast<ContainerType>(res);
}
@ -26,16 +26,25 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType &
* Для среднего - выделяется HashSet.
* Для большого - выделяется HyperLogLog.
*/
template <typename Key, typename HashContainer, UInt8 small_set_size_max, UInt8 medium_set_power2_max, UInt8 K>
template
<
typename Key,
typename HashContainer,
UInt8 small_set_size_max,
UInt8 medium_set_power2_max,
UInt8 K,
typename Hash = IntHash32<Key>,
typename DenominatorType = float
>
class CombinedCardinalityEstimator
{
public:
using Self = CombinedCardinalityEstimator<Key, HashContainer, small_set_size_max, medium_set_power2_max, K>;
using Self = CombinedCardinalityEstimator<Key, HashContainer, small_set_size_max, medium_set_power2_max, K, Hash, DenominatorType>;
private:
using Small = SmallSet<Key, small_set_size_max>;
using Medium = HashContainer;
using Large = HyperLogLogWithSmallSetOptimization<Key, small_set_size_max, K>;
using Large = HyperLogLogCounter<K, Hash, DenominatorType>;
public:
CombinedCardinalityEstimator()
@ -109,41 +118,18 @@ public:
toLarge();
}
if (container_type == details::ContainerType::SMALL)
if (rhs.getContainerType() == details::ContainerType::SMALL)
{
for (const auto & x : rhs.small)
insert(x);
}
else if (container_type == details::ContainerType::MEDIUM)
else if (rhs.getContainerType() == details::ContainerType::MEDIUM)
{
if (rhs.getContainerType() == details::ContainerType::SMALL)
{
for (const auto & x : rhs.small)
insert(x);
}
else if (rhs.getContainerType() == details::ContainerType::MEDIUM)
{
for (const auto & x : rhs.getContainer<Medium>())
insert(x);
}
for (const auto & x : rhs.getContainer<Medium>())
insert(x);
}
else if (container_type == details::ContainerType::LARGE)
{
if (rhs.getContainerType() == details::ContainerType::SMALL)
{
for (const auto & x : rhs.small)
insert(x);
}
else if (rhs.getContainerType() == details::ContainerType::MEDIUM)
{
for (const auto & x : rhs.getContainer<Medium>())
insert(x);
}
else if (rhs.getContainerType() == details::ContainerType::LARGE)
getContainer<Large>().merge(rhs.getContainer<Large>());
}
else
throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR);
else if (rhs.getContainerType() == details::ContainerType::LARGE)
getContainer<Large>().merge(rhs.getContainer<Large>());
}
/// Можно вызывать только для пустого объекта.
@ -171,9 +157,36 @@ public:
void readAndMerge(DB::ReadBuffer & in)
{
Self other;
other.read(in);
merge(other);
auto container_type = getContainerType();
UInt8 v;
readBinary(v, in);
auto rhs_container_type = static_cast<details::ContainerType>(v);
auto max_container_type = details::max(container_type, rhs_container_type);
if (container_type != max_container_type)
{
if (max_container_type == details::ContainerType::MEDIUM)
toMedium();
else if (max_container_type == details::ContainerType::LARGE)
toLarge();
}
if (rhs_container_type == details::ContainerType::SMALL)
{
typename Small::Reader reader(in);
while (reader.next())
insert(reader.get());
}
else if (rhs_container_type == details::ContainerType::MEDIUM)
{
typename Medium::Reader reader(in);
while (reader.next())
insert(reader.get());
}
else if (rhs_container_type == details::ContainerType::LARGE)
getContainer<Large>().readAndMerge(in);
}
void write(DB::WriteBuffer & out) const
@ -275,13 +288,13 @@ private:
}
template<typename T>
T & getContainer()
inline T & getContainer()
{
return *reinterpret_cast<T *>(address & mask);
}
template<typename T>
const T & getContainer() const
inline const T & getContainer() const
{
return *reinterpret_cast<T *>(address & mask);
}

View File

@ -251,6 +251,7 @@ class HashTable :
protected:
friend class const_iterator;
friend class iterator;
friend class Reader;
template <typename, typename, typename, typename, typename, typename, size_t>
friend class TwoLevelHashTable;
@ -429,6 +430,51 @@ public:
free();
}
class Reader final : private Cell::State
{
public:
Reader(DB::ReadBuffer & in_)
: in(in_)
{
}
Reader(const Reader &) = delete;
Reader & operator=(const Reader &) = delete;
bool next()
{
if (read_count == size)
{
is_eof = true;
return false;
}
else if (read_count == 0)
{
Cell::State::read(in);
DB::readVarUInt(size, in);
}
cell.read(in);
++read_count;
return true;
}
inline const value_type & get() const
{
if ((read_count == 0) || is_eof)
throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
return Cell::getKey(cell.getValue());
}
private:
DB::ReadBuffer in;
Cell cell;
size_t read_count = 0;
size_t size;
bool is_eof = false;
};
class iterator
{

View File

@ -27,6 +27,7 @@ class SmallTable :
protected:
friend class const_iterator;
friend class iterator;
friend class Reader;
typedef SmallTable<Key, Cell, capacity> Self;
typedef Cell cell_type;
@ -66,6 +67,55 @@ public:
typedef typename Cell::value_type value_type;
class Reader final : private Cell::State
{
public:
Reader(DB::ReadBuffer & in_)
: in(in_)
{
}
Reader(const Reader &) = delete;
Reader & operator=(const Reader &) = delete;
bool next()
{
if (read_count == size)
{
is_eof = true;
return false;
}
else if (read_count == 0)
{
Cell::State::read(in);
DB::readVarUInt(size, in);
if (size > capacity)
throw DB::Exception("Illegal size");
}
cell.read(in);
++read_count;
return true;
}
inline const value_type & get() const
{
if ((read_count == 0) || is_eof)
throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
return Cell::getKey(cell.getValue());
}
private:
DB::ReadBuffer in;
Cell cell;
size_t read_count = 0;
size_t size;
bool is_eof = false;
};
class iterator
{
Self * container;

View File

@ -114,10 +114,20 @@ public:
void readAndMerge(DB::ReadBuffer & in)
{
/// Немного не оптимально.
HyperLogLogWithSmallSetOptimization other;
other.read(in);
merge(other);
bool is_rhs_large;
readBinary(is_rhs_large, in);
if (!isLarge() && is_rhs_large)
toLarge();
if (!is_rhs_large)
{
typename Small::Reader reader(in);
while (reader.next())
insert(reader.get());
}
else
large->readAndMerge(in);
}
void write(DB::WriteBuffer & out) const

View File

@ -283,6 +283,7 @@ namespace ErrorCodes
INDEX_NOT_USED = 277,
LEADERSHIP_LOST = 278,
ALL_CONNECTION_TRIES_FAILED = 279,
NO_AVAILABLE_DATA = 280,
KEEPER_EXCEPTION = 999,
POCO_EXCEPTION = 1000,