Merge branch 'master' into decimal_hex_function

This commit is contained in:
Mikhail Korotov 2019-11-06 18:20:05 +03:00
commit 317af908fd
69 changed files with 710 additions and 496 deletions

View File

@ -579,7 +579,7 @@ public:
{ {
for (auto & elem : table) for (auto & elem : table)
{ {
Histogram & histogram = elem.getSecond(); Histogram & histogram = elem.getMapped();
if (histogram.buckets.size() < params.num_buckets_cutoff) if (histogram.buckets.size() < params.num_buckets_cutoff)
{ {
@ -593,7 +593,7 @@ public:
{ {
for (auto & elem : table) for (auto & elem : table)
{ {
Histogram & histogram = elem.getSecond(); Histogram & histogram = elem.getMapped();
if (!histogram.total) if (!histogram.total)
continue; continue;
@ -625,7 +625,7 @@ public:
{ {
for (auto & elem : table) for (auto & elem : table)
{ {
Histogram & histogram = elem.getSecond(); Histogram & histogram = elem.getMapped();
if (!histogram.total) if (!histogram.total)
continue; continue;
@ -641,7 +641,7 @@ public:
{ {
for (auto & elem : table) for (auto & elem : table)
{ {
Histogram & histogram = elem.getSecond(); Histogram & histogram = elem.getMapped();
if (!histogram.total) if (!histogram.total)
continue; continue;
@ -676,7 +676,7 @@ public:
while (true) while (true)
{ {
it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size())); it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()));
if (it && lookupResultGetMapped(it)->total + lookupResultGetMapped(it)->count_end != 0) if (it && it->getMapped().total + it->getMapped().count_end != 0)
break; break;
if (context_size == 0) if (context_size == 0)
@ -710,7 +710,7 @@ public:
if (num_bytes_after_desired_size > 0) if (num_bytes_after_desired_size > 0)
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size); end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
CodePoint code = lookupResultGetMapped(it)->sample(determinator, end_probability_multiplier); CodePoint code = it->getMapped().sample(determinator, end_probability_multiplier);
if (code == END) if (code == END)
break; break;

View File

@ -55,7 +55,7 @@ struct EntropyData
void merge(const EntropyData & rhs) void merge(const EntropyData & rhs)
{ {
for (const auto & pair : rhs.map) for (const auto & pair : rhs.map)
map[pair.getFirst()] += pair.getSecond(); map[pair.getKey()] += pair.getMapped();
} }
void serialize(WriteBuffer & buf) const void serialize(WriteBuffer & buf) const
@ -77,12 +77,12 @@ struct EntropyData
{ {
UInt64 total_value = 0; UInt64 total_value = 0;
for (const auto & pair : map) for (const auto & pair : map)
total_value += pair.getSecond(); total_value += pair.getMapped();
Float64 shannon_entropy = 0; Float64 shannon_entropy = 0;
for (const auto & pair : map) for (const auto & pair : map)
{ {
Float64 frequency = Float64(pair.getSecond()) / total_value; Float64 frequency = Float64(pair.getMapped()) / total_value;
shannon_entropy -= frequency * log2(frequency); shannon_entropy -= frequency * log2(frequency);
} }

View File

@ -58,7 +58,7 @@ struct QuantileExactWeighted
void merge(const QuantileExactWeighted & rhs) void merge(const QuantileExactWeighted & rhs)
{ {
for (const auto & pair : rhs.map) for (const auto & pair : rhs.map)
map[pair.getFirst()] += pair.getSecond(); map[pair.getKey()] += pair.getMapped();
} }
void serialize(WriteBuffer & buf) const void serialize(WriteBuffer & buf) const
@ -93,7 +93,7 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0; UInt64 sum_weight = 0;
for (const auto & pair : map) for (const auto & pair : map)
{ {
sum_weight += pair.getSecond(); sum_weight += pair.getMapped();
array[i] = pair.getValue(); array[i] = pair.getValue();
++i; ++i;
} }
@ -143,7 +143,7 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0; UInt64 sum_weight = 0;
for (const auto & pair : map) for (const auto & pair : map)
{ {
sum_weight += pair.getSecond(); sum_weight += pair.getMapped();
array[i] = pair.getValue(); array[i] = pair.getValue();
++i; ++i;
} }

View File

@ -35,7 +35,7 @@ namespace
data.resize(hash_map.size()); data.resize(hash_map.size());
for (const auto & val : hash_map) for (const auto & val : hash_map)
data[val.getSecond()] = val.getFirst(); data[val.getMapped()] = val.getKey();
for (auto & ind : index) for (auto & ind : index)
ind = hash_map[ind]; ind = hash_map[ind];

View File

@ -359,7 +359,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
if constexpr (has_mapped) if constexpr (has_mapped)
{ {
auto & mapped = *lookupResultGetMapped(it); auto & mapped = it->getMapped();
if (inserted) if (inserted)
{ {
new (&mapped) Mapped(); new (&mapped) Mapped();

View File

@ -174,13 +174,13 @@ protected:
[[maybe_unused]] Mapped * cached = nullptr; [[maybe_unused]] Mapped * cached = nullptr;
if constexpr (has_mapped) if constexpr (has_mapped)
cached = lookupResultGetMapped(it); cached = &it->getMapped();
if (inserted) if (inserted)
{ {
if constexpr (has_mapped) if constexpr (has_mapped)
{ {
new(lookupResultGetMapped(it)) Mapped(); new (&it->getMapped()) Mapped();
} }
} }
@ -191,18 +191,18 @@ protected:
if constexpr (has_mapped) if constexpr (has_mapped)
{ {
cache.value.first = *lookupResultGetKey(it); cache.value.first = it->getKey();
cache.value.second = *lookupResultGetMapped(it); cache.value.second = it->getMapped();
cached = &cache.value.second; cached = &cache.value.second;
} }
else else
{ {
cache.value = *lookupResultGetKey(it); cache.value = it->getKey();
} }
} }
if constexpr (has_mapped) if constexpr (has_mapped)
return EmplaceResult(*lookupResultGetMapped(it), *cached, inserted); return EmplaceResult(it->getMapped(), *cached, inserted);
else else
return EmplaceResult(inserted); return EmplaceResult(inserted);
} }
@ -233,7 +233,7 @@ protected:
cache.value.first = key; cache.value.first = key;
if (it) if (it)
{ {
cache.value.second = *lookupResultGetMapped(it); cache.value.second = it->getMapped();
} }
} }
else else
@ -243,7 +243,7 @@ protected:
} }
if constexpr (has_mapped) if constexpr (has_mapped)
return FindResult(it ? lookupResultGetMapped(it) : nullptr, it != nullptr); return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
else else
return FindResult(it != nullptr); return FindResult(it != nullptr);
} }

View File

@ -14,12 +14,6 @@ struct ClearableHashMapCell : public ClearableHashTableCell<Key, HashMapCell<Key
: Base::BaseCell(value_, state), Base::version(state.version) {} : Base::BaseCell(value_, state), Base::version(state.version) {}
}; };
template<typename Key, typename Mapped, typename Hash>
ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash>
ALWAYS_INLINE inline auto lookupResultGetMapped(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getSecond(); }
template template
< <
typename Key, typename Key,
@ -31,20 +25,16 @@ template
class ClearableHashMap : public HashTable<Key, ClearableHashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator> class ClearableHashMap : public HashTable<Key, ClearableHashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>
{ {
public: public:
using key_type = Key; Mapped & operator[](const Key & x)
using mapped_type = Mapped;
using value_type = typename ClearableHashMap::cell_type::value_type;
mapped_type & operator[](Key x)
{ {
typename ClearableHashMap::LookupResult it; typename ClearableHashMap::LookupResult it;
bool inserted; bool inserted;
this->emplace(x, it, inserted); this->emplace(x, it, inserted);
if (inserted) if (inserted)
new(lookupResultGetMapped(it)) mapped_type(); new (&it->getMapped()) Mapped();
return *lookupResultGetMapped(it); return it->getMapped();
} }
void clear() void clear()

View File

@ -48,12 +48,6 @@ struct ClearableHashTableCell : public BaseCell
ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {} ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {}
}; };
template<typename Key, typename BaseCell>
ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashTableCell<Key, BaseCell> * cell) { return &cell->key; }
template<typename Key, typename BaseCell>
ALWAYS_INLINE inline void * lookupResultGetMapped(ClearableHashTableCell<Key, BaseCell> *) { return nullptr; }
template template
< <
typename Key, typename Key,
@ -64,9 +58,6 @@ template
class ClearableHashSet : public HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator> class ClearableHashSet : public HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
{ {
public: public:
using key_type = Key;
using value_type = typename ClearableHashSet::cell_type::value_type;
using Base = HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>; using Base = HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>;
using typename Base::LookupResult; using typename Base::LookupResult;
@ -87,9 +78,6 @@ template
class ClearableHashSetWithSavedHash: public HashTable<Key, ClearableHashTableCell<Key, HashSetCellWithSavedHash<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator> class ClearableHashSetWithSavedHash: public HashTable<Key, ClearableHashTableCell<Key, HashSetCellWithSavedHash<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
{ {
public: public:
using key_type = Key;
using value_type = typename ClearableHashSetWithSavedHash::cell_type::value_type;
void clear() void clear()
{ {
++this->version; ++this->version;

View File

@ -11,6 +11,8 @@ struct FixedClearableHashMapCell
using State = ClearableHashSetState; using State = ClearableHashSetState;
using value_type = PairNoInit<Key, Mapped>; using value_type = PairNoInit<Key, Mapped>;
using mapped_type = Mapped;
UInt32 version; UInt32 version;
Mapped mapped; Mapped mapped;
@ -18,11 +20,12 @@ struct FixedClearableHashMapCell
FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {} FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {} FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
Mapped & getSecond() { return mapped; } const VoidKey getKey() const { return {}; }
const Mapped & getSecond() const { return mapped; } Mapped & getMapped() { return mapped; }
const Mapped & getMapped() const { return mapped; }
bool isZero(const State & state) const { return version != state.version; } bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; } void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
struct CellExt struct CellExt
{ {
@ -35,32 +38,33 @@ struct FixedClearableHashMapCell
} }
Key key; Key key;
FixedClearableHashMapCell * ptr; FixedClearableHashMapCell * ptr;
const Key & getFirst() const { return key; } const Key & getKey() const { return key; }
Mapped & getSecond() { return ptr->mapped; } Mapped & getMapped() { return ptr->mapped; }
const Mapped & getSecond() const { return *ptr->mapped; } const Mapped & getMapped() const { return *ptr->mapped; }
const value_type getValue() const { return {key, *ptr->mapped}; } const value_type getValue() const { return {key, *ptr->mapped}; }
}; };
}; };
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator> template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedClearableHashMap : public FixedHashMap<Key, FixedClearableHashMapCell<Key, Mapped>, Allocator> class FixedClearableHashMap : public FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>
{ {
public: public:
using key_type = Key; using Base = FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>;
using mapped_type = Mapped; using Self = FixedClearableHashMap;
using value_type = typename FixedClearableHashMap::cell_type::value_type; using LookupResult = typename Base::LookupResult;
mapped_type & operator[](Key x) using Base::Base;
Mapped & operator[](const Key & x)
{ {
typename FixedClearableHashMap::iterator it; LookupResult it;
bool inserted; bool inserted;
this->emplace(x, it, inserted); this->emplace(x, it, inserted);
if (inserted) if (inserted)
new (&it->second) mapped_type(); new (&it->getMapped()) Mapped();
return it->second; return it->getMapped();
} }
void clear() void clear()

View File

@ -10,19 +10,23 @@ struct FixedClearableHashTableCell
using State = ClearableHashSetState; using State = ClearableHashSetState;
using value_type = Key; using value_type = Key;
using mapped_type = void; using mapped_type = VoidMapped;
UInt32 version; UInt32 version;
FixedClearableHashTableCell() {} FixedClearableHashTableCell() {}
FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {} FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
bool isZero(const State & state) const { return version != state.version; } bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; } void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
struct CellExt struct CellExt
{ {
Key key; Key key;
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; } const value_type & getValue() const { return key; }
void update(Key && key_, FixedClearableHashTableCell *) { key = key_; } void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
}; };
@ -34,8 +38,6 @@ class FixedClearableHashSet : public FixedHashTable<Key, FixedClearableHashTable
{ {
public: public:
using Base = FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>; using Base = FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>;
using key_type = Key;
using value_type = typename FixedClearableHashSet::cell_type::value_type;
using LookupResult = typename Base::LookupResult; using LookupResult = typename Base::LookupResult;
void clear() void clear()

View File

@ -13,18 +13,19 @@ struct FixedHashMapCell
using value_type = PairNoInit<Key, Mapped>; using value_type = PairNoInit<Key, Mapped>;
using mapped_type = TMapped; using mapped_type = TMapped;
Mapped mapped;
bool full; bool full;
Mapped mapped;
FixedHashMapCell() {} FixedHashMapCell() {}
FixedHashMapCell(const Key &, const State &) : full(true) {} FixedHashMapCell(const Key &, const State &) : full(true) {}
FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {} FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
Mapped & getSecond() { return mapped; } const VoidKey getKey() const { return {}; }
const Mapped & getSecond() const { return mapped; } Mapped & getMapped() { return mapped; }
const Mapped & getMapped() const { return mapped; }
bool isZero(const State &) const { return !full; } bool isZero(const State &) const { return !full; }
void setZero() { full = false; } void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false;
/// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field. /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
/// Note that we have to assemble a continuous layout for the value_type on each call of getValue(). /// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
@ -40,36 +41,23 @@ struct FixedHashMapCell
Key key; Key key;
FixedHashMapCell * ptr; FixedHashMapCell * ptr;
const Key & getFirst() const { return key; } const Key & getKey() const { return key; }
Mapped & getSecond() { return ptr->mapped; } Mapped & getMapped() { return ptr->mapped; }
const Mapped & getSecond() const { return ptr->mapped; } const Mapped & getMapped() const { return ptr->mapped; }
const value_type getValue() const { return {key, ptr->mapped}; } const value_type getValue() const { return {key, ptr->mapped}; }
}; };
}; };
template<typename Key, typename Mapped, typename State> template <typename Key, typename Mapped, typename Cell = FixedHashMapCell<Key, Mapped>, typename Allocator = HashTableAllocator>
ALWAYS_INLINE inline void * lookupResultGetKey(FixedHashMapCell<Key, Mapped, State> *) class FixedHashMap : public FixedHashTable<Key, Cell, Allocator>
{ return nullptr; }
template<typename Key, typename Mapped, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(FixedHashMapCell<Key, Mapped, State> * cell)
{ return &cell->getSecond(); }
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>
{ {
public: public:
using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>; using Base = FixedHashTable<Key, Cell, Allocator>;
using Self = FixedHashMap; using Self = FixedHashMap;
using key_type = Key; using LookupResult = typename Base::LookupResult;
using Cell = typename Base::cell_type;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::Mapped;
using Base::Base; using Base::Base;
using LookupResult = typename Base::LookupResult;
template <typename Func> template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{ {
@ -77,8 +65,8 @@ public:
{ {
typename Self::LookupResult res_it; typename Self::LookupResult res_it;
bool inserted; bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash()); that.emplace(it->getKey(), res_it, inserted, it.getHash());
func(*lookupResultGetMapped(res_it), it->getSecond(), inserted); func(res_it->getMapped(), it->getMapped(), inserted);
} }
} }
@ -87,11 +75,11 @@ public:
{ {
for (auto it = this->begin(), end = this->end(); it != end; ++it) for (auto it = this->begin(), end = this->end(); it != end; ++it)
{ {
auto res_it = that.find(it->getFirst(), it.getHash()); auto res_it = that.find(it->getKey(), it.getHash());
if (!res_it) if (!res_it)
func(it->getSecond(), it->getSecond(), false); func(it->getMapped(), it->getMapped(), false);
else else
func(*lookupResultGetMapped(res_it), it->getSecond(), true); func(res_it->getMapped(), it->getMapped(), true);
} }
} }
@ -99,24 +87,24 @@ public:
void forEachValue(Func && func) void forEachValue(Func && func)
{ {
for (auto & v : *this) for (auto & v : *this)
func(v.getFirst(), v.getSecond()); func(v.getKey(), v.getMapped());
} }
template <typename Func> template <typename Func>
void forEachMapped(Func && func) void forEachMapped(Func && func)
{ {
for (auto & v : *this) for (auto & v : *this)
func(v.getSecond()); func(v.getMapped());
} }
mapped_type & ALWAYS_INLINE operator[](Key x) Mapped & ALWAYS_INLINE operator[](const Key & x)
{ {
typename Base::LookupResult it; LookupResult it;
bool inserted; bool inserted;
this->emplace(x, it, inserted); this->emplace(x, it, inserted);
if (inserted) if (inserted)
new (it) mapped_type(); new (&it->getMapped()) Mapped();
return it; return it->getMapped();
} }
}; };

View File

@ -6,14 +6,15 @@ template <typename Key, typename Allocator = HashTableAllocator>
class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator> class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>
{ {
public: public:
using Base = FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>; using Cell = FixedHashTableCell<Key>;
using Base = FixedHashTable<Key, Cell, Allocator>;
using Self = FixedHashSet; using Self = FixedHashSet;
void merge(const Self & rhs) void merge(const Self & rhs)
{ {
for (size_t i = 0; i < Base::BUFFER_SIZE; ++i) for (size_t i = 0; i < Base::BUFFER_SIZE; ++i)
if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this)) if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this))
Base::buf[i] = rhs.buf[i]; new (&Base::buf[i]) Cell(rhs.buf[i]);
} }
/// NOTE: Currently this method isn't used. When it does, the ReadBuffer should /// NOTE: Currently this method isn't used. When it does, the ReadBuffer should

View File

@ -8,12 +8,15 @@ struct FixedHashTableCell
using State = TState; using State = TState;
using value_type = Key; using value_type = Key;
using mapped_type = void; using mapped_type = VoidMapped;
bool full; bool full;
FixedHashTableCell() {} FixedHashTableCell() {}
FixedHashTableCell(const Key &, const State &) : full(true) {} FixedHashTableCell(const Key &, const State &) : full(true) {}
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
bool isZero(const State &) const { return !full; } bool isZero(const State &) const { return !full; }
void setZero() { full = false; } void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false; static constexpr bool need_zero_value_storage = false;
@ -28,6 +31,8 @@ struct FixedHashTableCell
{ {
Key key; Key key;
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; } const value_type & getValue() const { return key; }
void update(Key && key_, FixedHashTableCell *) { key = key_; } void update(Key && key_, FixedHashTableCell *) { key = key_; }
}; };
@ -53,7 +58,7 @@ struct FixedHashTableCell
template <typename Key, typename Cell, typename Allocator> template <typename Key, typename Cell, typename Allocator>
class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State
{ {
static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8); static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
protected: protected:
friend class const_iterator; friend class const_iterator;
@ -61,12 +66,11 @@ protected:
friend class Reader; friend class Reader;
using Self = FixedHashTable; using Self = FixedHashTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements size_t m_size = 0; /// Amount of elements
Cell * buf; /// A piece of memory for all elements except the element with zero key. Cell * buf; /// A piece of memory for all elements.
void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); } void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
void free() void free()
{ {
@ -111,7 +115,7 @@ protected:
++ptr; ++ptr;
/// Skip empty cells in the main buffer. /// Skip empty cells in the main buffer.
auto buf_end = container->buf + container->BUFFER_SIZE; auto buf_end = container->buf + container->NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*container)) while (ptr < buf_end && ptr->isZero(*container))
++ptr; ++ptr;
@ -140,8 +144,9 @@ protected:
public: public:
using key_type = Key; using key_type = Key;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::mapped_type; using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
using LookupResult = Cell *; using LookupResult = Cell *;
using ConstLookupResult = const Cell *; using ConstLookupResult = const Cell *;
@ -239,7 +244,7 @@ public:
return end(); return end();
const Cell * ptr = buf; const Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE; auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this)) while (ptr < buf_end && ptr->isZero(*this))
++ptr; ++ptr;
@ -254,21 +259,21 @@ public:
return end(); return end();
Cell * ptr = buf; Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE; auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this)) while (ptr < buf_end && ptr->isZero(*this))
++ptr; ++ptr;
return iterator(this, ptr); return iterator(this, ptr);
} }
const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); } const_iterator end() const { return const_iterator(this, buf + NUM_CELLS); }
const_iterator cend() const { return end(); } const_iterator cend() const { return end(); }
iterator end() { return iterator(this, buf + BUFFER_SIZE); } iterator end() { return iterator(this, buf + NUM_CELLS); }
public: public:
/// The last parameter is unused but exists for compatibility with HashTable interface. /// The last parameter is unused but exists for compatibility with HashTable interface.
void ALWAYS_INLINE emplace(Key x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
{ {
it = &buf[x]; it = &buf[x];
@ -288,40 +293,31 @@ public:
std::pair<LookupResult, bool> res; std::pair<LookupResult, bool> res;
emplace(Cell::getKey(x), res.first, res.second); emplace(Cell::getKey(x), res.first, res.second);
if (res.second) if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x); insertSetMapped(res.first->getMapped(), x);
return res; return res;
} }
LookupResult ALWAYS_INLINE find(Key x) LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; }
{
return !buf[x].isZero(*this) ? &buf[x] : nullptr;
}
ConstLookupResult ALWAYS_INLINE find(Key x) const ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
}
LookupResult ALWAYS_INLINE find(Key, size_t hash_value) LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; }
{
return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr;
}
ConstLookupResult ALWAYS_INLINE find(Key key, size_t hash_value) const ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const
{ {
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value); return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
} }
bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); } bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); } bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
void write(DB::WriteBuffer & wb) const void write(DB::WriteBuffer & wb) const
{ {
Cell::State::write(wb); Cell::State::write(wb);
DB::writeVarUInt(m_size, wb); DB::writeVarUInt(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr) for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
if (!ptr->isZero(*this)) if (!ptr->isZero(*this))
{ {
DB::writeVarUInt(ptr - buf); DB::writeVarUInt(ptr - buf);
@ -334,7 +330,7 @@ public:
Cell::State::writeText(wb); Cell::State::writeText(wb);
DB::writeText(m_size, wb); DB::writeText(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr) for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
{ {
if (!ptr->isZero(*this)) if (!ptr->isZero(*this))
{ {
@ -393,7 +389,7 @@ public:
destroyElements(); destroyElements();
m_size = 0; m_size = 0;
memset(static_cast<void *>(buf), 0, BUFFER_SIZE * sizeof(*buf)); memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
} }
/// After executing this function, the table can only be destroyed, /// After executing this function, the table can only be destroyed,
@ -405,9 +401,9 @@ public:
free(); free();
} }
size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); } size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }
size_t getBufferSizeInCells() const { return BUFFER_SIZE; } size_t getBufferSizeInCells() const { return NUM_CELLS; }
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
size_t getCollisions() const { return 0; } size_t getCollisions() const { return 0; }

View File

@ -52,12 +52,13 @@ struct HashMapCell
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {} HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
HashMapCell(const value_type & value_, const State &) : value(value_) {} HashMapCell(const value_type & value_, const State &) : value(value_) {}
const Key & getFirst() const { return value.first; } /// Get the key (externally).
Mapped & getSecond() { return value.second; } const Key & getKey() const { return value.first; }
const Mapped & getSecond() const { return value.second; } Mapped & getMapped() { return value.second; }
const Mapped & getMapped() const { return value.second; }
const value_type & getValue() const { return value; } const value_type & getValue() const { return value; }
/// Get the key (internally).
static const Key & getKey(const value_type & value) { return value.first; } static const Key & getKey(const value_type & value) { return value.first; }
bool keyEquals(const Key & key_) const { return value.first == key_; } bool keyEquals(const Key & key_) const { return value.first == key_; }
@ -110,15 +111,6 @@ struct HashMapCell
} }
}; };
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCell<Key, Mapped, Hash, State> * cell)
{ return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCell<Key, Mapped, Hash, State> * cell)
{ return &cell->getSecond(); }
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState> template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState> struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
{ {
@ -136,15 +128,6 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
}; };
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
{ return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
{ return &cell->getSecond(); }
template < template <
typename Key, typename Key,
typename Cell, typename Cell,
@ -156,14 +139,9 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
public: public:
using Self = HashMapTable; using Self = HashMapTable;
using Base = HashTable<Key, Cell, Hash, Grower, Allocator>; using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
using key_type = Key;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::Mapped;
using LookupResult = typename Base::LookupResult; using LookupResult = typename Base::LookupResult;
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable; using Base::Base;
/// Merge every cell's value of current map into the destination map via emplace. /// Merge every cell's value of current map into the destination map via emplace.
/// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
@ -178,8 +156,8 @@ public:
{ {
typename Self::LookupResult res_it; typename Self::LookupResult res_it;
bool inserted; bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash()); that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash());
func(*lookupResultGetMapped(res_it), it->getSecond(), inserted); func(res_it->getMapped(), it->getMapped(), inserted);
} }
} }
@ -193,11 +171,11 @@ public:
{ {
for (auto it = this->begin(), end = this->end(); it != end; ++it) for (auto it = this->begin(), end = this->end(); it != end; ++it)
{ {
auto res_it = that.find(it->getFirst(), it.getHash()); auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash());
if (!res_it) if (!res_it)
func(it->getSecond(), it->getSecond(), false); func(it->getMapped(), it->getMapped(), false);
else else
func(*lookupResultGetMapped(res_it), it->getSecond(), true); func(res_it->getMapped(), it->getMapped(), true);
} }
} }
@ -206,7 +184,7 @@ public:
void forEachValue(Func && func) void forEachValue(Func && func)
{ {
for (auto & v : *this) for (auto & v : *this)
func(v.getFirst(), v.getSecond()); func(v.getKey(), v.getMapped());
} }
/// Call func(Mapped &) for each hash map element. /// Call func(Mapped &) for each hash map element.
@ -214,12 +192,12 @@ public:
void forEachMapped(Func && func) void forEachMapped(Func && func)
{ {
for (auto & v : *this) for (auto & v : *this)
func(v.getSecond()); func(v.getMapped());
} }
mapped_type & ALWAYS_INLINE operator[](Key x) typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
{ {
typename HashMapTable::LookupResult it; LookupResult it;
bool inserted; bool inserted;
this->emplace(x, it, inserted); this->emplace(x, it, inserted);
@ -238,9 +216,9 @@ public:
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code. * the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
*/ */
if (inserted) if (inserted)
new(lookupResultGetMapped(it)) mapped_type(); new (&it->getMapped()) typename Cell::Mapped();
return *lookupResultGetMapped(it); return it->getMapped();
} }
}; };

View File

@ -84,14 +84,6 @@ struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState>
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
}; };
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashSetCellWithSavedHash<Key, Hash, State> * cell)
{ return &cell->key; }
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline void * lookupResultGetMapped(HashSetCellWithSavedHash<Key, Hash, State> *)
{ return nullptr; }
template template
< <
typename Key, typename Key,

View File

@ -78,66 +78,48 @@ void set(T & x) { x = 0; }
} }
/** /**
* lookupResultGetKey/Mapped -- functions to get key/"mapped" values from the * getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and
* LookupResult returned by find() and emplace() methods of HashTable. * emplace() methods of HashTable. Must not be called for a null LookupResult.
* Must not be called for a null LookupResult.
* *
* We don't use iterators for lookup result to avoid creating temporary * We don't use iterators for lookup result. Instead, LookupResult is a pointer of some kind. There
* objects. Instead, LookupResult is a pointer of some kind. There are global * are methods getKey/Mapped, that return references or values to key/"mapped" values.
* functions lookupResultGetKey/Mapped, overloaded for this pointer type, that
* return pointers to key/"mapped" values. They are implemented as global
* functions and not as methods, because they have to be overloaded for POD
* types, e.g. in StringHashTable where different components have different
* Cell format.
* *
* Different hash table implementations support this interface to a varying * Different hash table implementations support this interface to a varying degree:
* degree:
* *
* 1) Hash tables that store neither the key in its original form, nor a * 1) Hash tables that store neither the key in its original form, nor a "mapped" value:
* "mapped" value: FixedHashTable or StringHashTable. * FixedHashTable or StringHashTable. Neither GetKey nor GetMapped are supported, the only valid
* Neither GetKey nor GetMapped are supported, the only valid operation is * operation is checking LookupResult for null.
* checking LookupResult for null.
* *
* 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. * 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. Only GetMapped is
* Only GetMapped is supported. * supported.
* *
* 3) Hash tables that store the key and do not have a "mapped" value, e.g. the * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the normal HashTable.
* normal HashTable. * GetKey returns the key, and GetMapped returns a zero void pointer. This simplifies generic
* GetKey returns the key, and GetMapped returns a zero void pointer. This * code that works with mapped values: it can overload on the return type of GetMapped(), and
* simplifies generic code that works with mapped values: it can overload * doesn't need other parameters. One example is insertSetMapped() function.
* on the return type of GetMapped(), and doesn't need other parameters. One
* example is insertSetMapped() function.
* *
* 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. Both GetKey and
* Both GetKey and GetMapped are supported. * GetMapped are supported.
* *
* The implementation side goes as follows: * The implementation side goes as follows:
* for (1), LookupResult = void *, no getters; *
* for (2), LookupResult = Mapped *, GetMapped is a default implementation that * for (1), LookupResult->getKey = const VoidKey, LookupResult->getMapped = VoidMapped;
* takes any pointer-like object; *
* for (3) and (4), LookupResult = Cell *, and both getters are implemented. * for (2), LookupResult->getKey = const VoidKey, LookupResult->getMapped = Mapped &;
* They have to be specialized for each particular Cell class to supersede the *
* default verision that takes a generic pointer-like object. * for (3) and (4), LookupResult->getKey = const Key [&], LookupResult->getMapped = Mapped &;
* VoidKey and VoidMapped may have specialized function overloads for generic code.
*/ */
/** struct VoidKey {};
* The default implementation of GetMapped that is used for the above case (2). struct VoidMapped
*/
template<typename PointerLike>
ALWAYS_INLINE inline auto lookupResultGetMapped(PointerLike && ptr) { return &*ptr; }
/**
* Generic const wrapper for lookupResultGetMapped, that calls a non-const
* version. Should be safe, given that these functions only do pointer
* arithmetics.
*/
template<typename T>
ALWAYS_INLINE inline auto lookupResultGetMapped(const T * obj)
{ {
auto mapped_ptr = lookupResultGetMapped(const_cast<T *>(obj)); template <typename T>
const auto const_mapped_ptr = mapped_ptr; auto & operator=(const T &)
return const_mapped_ptr; {
} return *this;
}
};
/** Compile-time interface for cell of the hash table. /** Compile-time interface for cell of the hash table.
* Different cell types are used to implement different hash tables. * Different cell types are used to implement different hash tables.
@ -152,7 +134,7 @@ struct HashTableCell
using key_type = Key; using key_type = Key;
using value_type = Key; using value_type = Key;
using mapped_type = void; using mapped_type = VoidMapped;
Key key; Key key;
@ -161,10 +143,12 @@ struct HashTableCell
/// Create a cell with the given key / key and value. /// Create a cell with the given key / key and value.
HashTableCell(const Key & key_, const State &) : key(key_) {} HashTableCell(const Key & key_, const State &) : key(key_) {}
/// Get what the value_type of the container will be. /// Get the key (externally).
const Key & getKey() const { return key; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; } const value_type & getValue() const { return key; }
/// Get the key. /// Get the key (internally).
static const Key & getKey(const value_type & value) { return value; } static const Key & getKey(const value_type & value) { return value; }
/// Are the keys at the cells equal? /// Are the keys at the cells equal?
@ -207,23 +191,15 @@ struct HashTableCell
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); } void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
}; };
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashTableCell<Key, Hash, State> * cell)
{ return &cell->key; }
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline void * lookupResultGetMapped(HashTableCell<Key, Hash, State> *)
{ return nullptr; }
/** /**
* A helper function for HashTable::insert() to set the "mapped" value. * A helper function for HashTable::insert() to set the "mapped" value.
* Overloaded on the mapped type, does nothing if it's void. * Overloaded on the mapped type, does nothing if it's VoidMapped.
*/ */
template <typename ValueType> template <typename ValueType>
void insertSetMapped(void * /* dest */, const ValueType & /* src */) {} void insertSetMapped(VoidMapped /* dest */, const ValueType & /* src */) {}
template <typename MappedType, typename ValueType> template <typename MappedType, typename ValueType>
void insertSetMapped(MappedType * dest, const ValueType & src) { *dest = src.second; } void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.second; }
/** Determines the size of the hash table, and when and how much it should be resized. /** Determines the size of the hash table, and when and how much it should be resized.
@ -276,7 +252,7 @@ struct HashTableGrower
/** When used as a Grower, it turns a hash table into something like a lookup table. /** When used as a Grower, it turns a hash table into something like a lookup table.
* It remains non-optimal - the cells store the keys. * It remains non-optimal - the cells store the keys.
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed. * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
* TODO Make a proper lookup table. * NOTE: Better to use FixedHashTable instead.
*/ */
template <size_t key_bits> template <size_t key_bits>
struct HashTableFixedGrower struct HashTableFixedGrower
@ -366,7 +342,6 @@ protected:
using HashValue = size_t; using HashValue = size_t;
using Self = HashTable; using Self = HashTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements size_t m_size = 0; /// Amount of elements
Cell * buf; /// A piece of memory for all elements except the element with zero key. Cell * buf; /// A piece of memory for all elements except the element with zero key.
@ -586,9 +561,10 @@ protected:
public: public:
using key_type = Key; using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type; using value_type = typename Cell::value_type;
using cell_type = Cell;
// Use lookupResultGetMapped/Key to work with these values.
using LookupResult = Cell *; using LookupResult = Cell *;
using ConstLookupResult = const Cell *; using ConstLookupResult = const Cell *;
@ -751,7 +727,7 @@ protected:
/// If the key is zero, insert it into a special place and return true. /// If the key is zero, insert it into a special place and return true.
/// We don't have to persist a zero key, because it's not actually inserted. /// We don't have to persist a zero key, because it's not actually inserted.
/// That's why we just take a Key by value, an not a key holder. /// That's why we just take a Key by value, an not a key holder.
bool ALWAYS_INLINE emplaceIfZero(Key x, LookupResult & it, bool & inserted, size_t hash_value) bool ALWAYS_INLINE emplaceIfZero(const Key & x, LookupResult & it, bool & inserted, size_t hash_value)
{ {
/// If it is claimed that the zero key can not be inserted into the table. /// If it is claimed that the zero key can not be inserted into the table.
if (!Cell::need_zero_value_storage) if (!Cell::need_zero_value_storage)
@ -793,7 +769,7 @@ protected:
keyHolderPersistKey(key_holder); keyHolderPersistKey(key_holder);
const auto & key = keyHolderGetKey(key_holder); const auto & key = keyHolderGetKey(key_holder);
new(&buf[place_value]) Cell(key, *this); new (&buf[place_value]) Cell(key, *this);
buf[place_value].setHash(hash_value); buf[place_value].setHash(hash_value);
inserted = true; inserted = true;
++m_size; ++m_size;
@ -846,7 +822,7 @@ public:
} }
if (res.second) if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x); insertSetMapped(res.first->getMapped(), x);
return res; return res;
} }
@ -869,11 +845,11 @@ public:
* *
* Example usage: * Example usage:
* *
* Map::iterator it; * Map::LookupResult it;
* bool inserted; * bool inserted;
* map.emplace(key, it, inserted); * map.emplace(key, it, inserted);
* if (inserted) * if (inserted)
* new(&it->second) Mapped(value); * new (&it->getMapped()) Mapped(value);
*/ */
template <typename KeyHolder> template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
@ -903,7 +879,7 @@ public:
resize(); resize();
} }
LookupResult ALWAYS_INLINE find(Key x) LookupResult ALWAYS_INLINE find(const Key & x)
{ {
if (Cell::isZero(x, *this)) if (Cell::isZero(x, *this))
return this->hasZero() ? this->zeroValue() : nullptr; return this->hasZero() ? this->zeroValue() : nullptr;
@ -913,12 +889,12 @@ public:
return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr; return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
} }
ConstLookupResult ALWAYS_INLINE find(Key x) const ConstLookupResult ALWAYS_INLINE find(const Key & x) const
{ {
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
} }
LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) LookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value)
{ {
if (Cell::isZero(x, *this)) if (Cell::isZero(x, *this))
return this->hasZero() ? this->zeroValue() : nullptr; return this->hasZero() ? this->zeroValue() : nullptr;
@ -927,7 +903,12 @@ public:
return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr; return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
} }
bool ALWAYS_INLINE has(Key x) const ConstLookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
}
bool ALWAYS_INLINE has(const Key & x) const
{ {
if (Cell::isZero(x, *this)) if (Cell::isZero(x, *this))
return this->hasZero(); return this->hasZero();
@ -938,7 +919,7 @@ public:
} }
bool ALWAYS_INLINE has(Key x, size_t hash_value) const bool ALWAYS_INLINE has(const Key & x, size_t hash_value) const
{ {
if (Cell::isZero(x, *this)) if (Cell::isZero(x, *this))
return this->hasZero(); return this->hasZero();

View File

@ -38,7 +38,6 @@ protected:
friend class Reader; friend class Reader;
using Self = SmallTable; using Self = SmallTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements. size_t m_size = 0; /// Amount of elements.
Cell buf[capacity]; /// A piece of memory for all elements. Cell buf[capacity]; /// A piece of memory for all elements.
@ -72,8 +71,9 @@ protected:
public: public:
using key_type = Key; using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type; using value_type = typename Cell::value_type;
using cell_type = Cell;
class Reader final : private Cell::State class Reader final : private Cell::State
{ {
@ -391,16 +391,17 @@ class SmallMapTable : public SmallTable<Key, Cell, capacity>
{ {
public: public:
using key_type = Key; using key_type = Key;
using mapped_type = typename Cell::Mapped; using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type; using value_type = typename Cell::value_type;
using cell_type = Cell;
mapped_type & ALWAYS_INLINE operator[](Key x) mapped_type & ALWAYS_INLINE operator[](Key x)
{ {
typename SmallMapTable::iterator it; typename SmallMapTable::iterator it;
bool inserted; bool inserted;
this->emplace(x, it, inserted); this->emplace(x, it, inserted);
new(&it->getSecond()) mapped_type(); new (&it->getMapped()) mapped_type();
return it->getSecond(); return it->getMapped();
} }
}; };

View File

@ -8,43 +8,60 @@ template <typename Key, typename TMapped>
struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState> struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
{ {
using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>; using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base; using Base::Base;
static constexpr bool need_zero_value_storage = false; static constexpr bool need_zero_value_storage = false;
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const Key & getKey(const value_type & value_) { return value_.first; }
}; };
template<typename Key, typename Mapped>
auto lookupResultGetMapped(StringHashMapCell<Key, Mapped> * cell) { return &cell->getSecond(); }
template <typename TMapped> template <typename TMapped>
struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState> struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
{ {
using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>; using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base; using Base::Base;
static constexpr bool need_zero_value_storage = false; static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; } static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; }
void setZero() { this->value.first.low = 0; } void setZero() { this->value.first.low = 0; }
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
}; };
template <typename TMapped> template <typename TMapped>
struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState> struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
{ {
using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>; using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base; using Base::Base;
static constexpr bool need_zero_value_storage = false; static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; } static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; }
void setZero() { this->value.first.a = 0; } void setZero() { this->value.first.a = 0; }
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
}; };
template <typename TMapped> template <typename TMapped>
struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState> struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
{ {
using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>; using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base; using Base::Base;
static constexpr bool need_zero_value_storage = false; static constexpr bool need_zero_value_storage = false;
// external
using Base::getKey;
// internal
static const StringRef & getKey(const value_type & value_) { return value_.first; }
}; };
template <typename TMapped, typename Allocator> template <typename TMapped, typename Allocator>
@ -61,13 +78,10 @@ template <typename TMapped, typename Allocator = HashTableAllocator>
class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>> class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
{ {
public: public:
using Key = StringRef;
using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>; using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
using Self = StringHashMap; using Self = StringHashMap;
using Key = StringRef; using LookupResult = typename Base::LookupResult;
using key_type = StringRef;
using mapped_type = TMapped;
using value_type = typename Base::Ts::value_type;
using LookupResult = mapped_type *;
using Base::Base; using Base::Base;
@ -80,18 +94,13 @@ public:
template <typename Func> template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{ {
if (this->m0.hasZero()) if (this->m0.hasZero() && that.m0.hasZero())
func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
else if (this->m0.hasZero())
{ {
const bool emplace_new_zero = !that.m0.hasZero(); that.m0.setHasZero();
if (emplace_new_zero) func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
{
that.m0.setHasZero();
}
func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(),
emplace_new_zero);
} }
this->m1.mergeToViaEmplace(that.m1, func); this->m1.mergeToViaEmplace(that.m1, func);
this->m2.mergeToViaEmplace(that.m2, func); this->m2.mergeToViaEmplace(that.m2, func);
this->m3.mergeToViaEmplace(that.m3, func); this->m3.mergeToViaEmplace(that.m3, func);
@ -106,32 +115,25 @@ public:
template <typename Func> template <typename Func>
void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
{ {
if (this->m0.hasZero()) if (this->m0.size() && that.m0.size())
{ func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
if (that.m0.hasZero()) else if (this->m0.size())
{ func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true);
}
else
{
func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false);
}
}
this->m1.mergeToViaFind(that.m1, func); this->m1.mergeToViaFind(that.m1, func);
this->m2.mergeToViaFind(that.m2, func); this->m2.mergeToViaFind(that.m2, func);
this->m3.mergeToViaFind(that.m3, func); this->m3.mergeToViaFind(that.m3, func);
this->ms.mergeToViaFind(that.ms, func); this->ms.mergeToViaFind(that.ms, func);
} }
mapped_type & ALWAYS_INLINE operator[](Key x) TMapped & ALWAYS_INLINE operator[](const Key & x)
{ {
LookupResult it;
bool inserted; bool inserted;
LookupResult it = nullptr; this->emplace(x, it, inserted);
emplace(x, it, inserted);
if (inserted) if (inserted)
new (it) mapped_type(); new (&it->getMapped()) TMapped();
return *it;
return it->getMapped();
} }
template <typename Func> template <typename Func>
@ -139,27 +141,27 @@ public:
{ {
if (this->m0.size()) if (this->m0.size())
{ {
func(StringRef{}, this->m0.zeroValue()->getSecond()); func(StringRef{}, this->m0.zeroValue()->getMapped());
} }
for (auto & v : this->m1) for (auto & v : this->m1)
{ {
func(toStringRef(v.getFirst()), v.getSecond()); func(v.getKey(), v.getMapped());
} }
for (auto & v : this->m2) for (auto & v : this->m2)
{ {
func(toStringRef(v.getFirst()), v.getSecond()); func(v.getKey(), v.getMapped());
} }
for (auto & v : this->m3) for (auto & v : this->m3)
{ {
func(toStringRef(v.getFirst()), v.getSecond()); func(v.getKey(), v.getMapped());
} }
for (auto & v : this->ms) for (auto & v : this->ms)
{ {
func(v.getFirst(), v.getSecond()); func(v.getKey(), v.getMapped());
} }
} }
@ -167,14 +169,14 @@ public:
void ALWAYS_INLINE forEachMapped(Func && func) void ALWAYS_INLINE forEachMapped(Func && func)
{ {
if (this->m0.size()) if (this->m0.size())
func(this->m0.zeroValue()->getSecond()); func(this->m0.zeroValue()->getMapped());
for (auto & v : this->m1) for (auto & v : this->m1)
func(v.getSecond()); func(v.getMapped());
for (auto & v : this->m2) for (auto & v : this->m2)
func(v.getSecond()); func(v.getMapped());
for (auto & v : this->m3) for (auto & v : this->m3)
func(v.getSecond()); func(v.getMapped());
for (auto & v : this->ms) for (auto & v : this->ms)
func(v.getSecond()); func(v.getMapped());
} }
}; };

View File

@ -3,9 +3,7 @@
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h> #include <Common/HashTable/HashTable.h>
struct StringKey0 #include <variant>
{
};
using StringKey8 = UInt64; using StringKey8 = UInt64;
using StringKey16 = DB::UInt128; using StringKey16 = DB::UInt128;
@ -112,7 +110,7 @@ public:
using ConstLookupResult = const Cell *; using ConstLookupResult = const Cell *;
template <typename KeyHolder> template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */) void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0)
{ {
if (!hasZero()) if (!hasZero())
{ {
@ -125,11 +123,16 @@ public:
} }
template <typename Key> template <typename Key>
LookupResult ALWAYS_INLINE find(Key, size_t /* hash */) LookupResult ALWAYS_INLINE find(const Key &, size_t = 0)
{ {
return hasZero() ? zeroValue() : nullptr; return hasZero() ? zeroValue() : nullptr;
} }
template <typename Key>
ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const
{
return hasZero() ? zeroValue() : nullptr;
}
void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
@ -148,6 +151,26 @@ struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
void increaseSize() { this->size_degree += 1; } void increaseSize() { this->size_degree += 1; }
}; };
template <typename Mapped>
struct StringHashTableLookupResult
{
Mapped * mapped_ptr;
StringHashTableLookupResult() {}
StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
StringHashTableLookupResult(std::nullptr_t) {}
const VoidKey getKey() const { return {}; }
auto & getMapped() { return *mapped_ptr; }
auto & operator*() { return *this; }
auto & operator*() const { return *this; }
auto * operator->() { return this; }
auto * operator->() const { return this; }
operator bool() const { return mapped_ptr; }
friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }
friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
};
template <typename SubMaps> template <typename SubMaps>
class StringHashTable : private boost::noncopyable class StringHashTable : private boost::noncopyable
{ {
@ -177,8 +200,12 @@ protected:
public: public:
using Key = StringRef; using Key = StringRef;
using key_type = Key; using key_type = Key;
using mapped_type = typename Ts::mapped_type;
using value_type = typename Ts::value_type; using value_type = typename Ts::value_type;
using LookupResult = typename Ts::mapped_type *; using cell_type = typename Ts::cell_type;
using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>;
using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>;
StringHashTable() {} StringHashTable() {}
@ -199,16 +226,15 @@ public:
// 2. Use switch case extension to generate fast dispatching table // 2. Use switch case extension to generate fast dispatching table
// 3. Funcs are named callables that can be force_inlined // 3. Funcs are named callables that can be force_inlined
// NOTE: It relies on Little Endianness // NOTE: It relies on Little Endianness
template <typename KeyHolder, typename Func> template <typename Self, typename KeyHolder, typename Func>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{ {
const StringRef & x = keyHolderGetKey(key_holder); const StringRef & x = keyHolderGetKey(key_holder);
const size_t sz = x.size; const size_t sz = x.size;
if (sz == 0) if (sz == 0)
{ {
static constexpr StringKey0 key0{};
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(m0, key0, 0); return func(self.m0, VoidKey{}, 0);
} }
const char * p = x.data; const char * p = x.data;
@ -239,7 +265,7 @@ public:
n[0] >>= s; n[0] >>= s;
} }
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(m1, k8, hash(k8)); return func(self.m1, k8, hash(k8));
} }
case 1: // 9..16 bytes case 1: // 9..16 bytes
{ {
@ -248,7 +274,7 @@ public:
memcpy(&n[1], lp, 8); memcpy(&n[1], lp, 8);
n[1] >>= s; n[1] >>= s;
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(m2, k16, hash(k16)); return func(self.m2, k16, hash(k16));
} }
case 2: // 17..24 bytes case 2: // 17..24 bytes
{ {
@ -257,11 +283,11 @@ public:
memcpy(&n[2], lp, 8); memcpy(&n[2], lp, 8);
n[2] >>= s; n[2] >>= s;
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(m3, k24, hash(k24)); return func(self.m3, k24, hash(k24));
} }
default: // >= 25 bytes default: // >= 25 bytes
{ {
return func(ms, std::forward<KeyHolder>(key_holder), hash(x)); return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
} }
} }
} }
@ -279,14 +305,14 @@ public:
{ {
typename Map::LookupResult result; typename Map::LookupResult result;
map.emplace(key_holder, result, inserted, hash); map.emplace(key_holder, result, inserted, hash);
mapped = lookupResultGetMapped(result); mapped = &result->getMapped();
} }
}; };
template <typename KeyHolder> template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{ {
this->dispatch(key_holder, EmplaceCallable(it, inserted)); this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
} }
struct FindCallable struct FindCallable
@ -295,15 +321,25 @@ public:
// any key holders here, only with normal keys. The key type is still // any key holders here, only with normal keys. The key type is still
// different for every subtable, this is why it is a template parameter. // different for every subtable, this is why it is a template parameter.
template <typename Submap, typename SubmapKey> template <typename Submap, typename SubmapKey>
LookupResult ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash) auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
{ {
return lookupResultGetMapped(map.find(key, hash)); return &map.find(key, hash)->getMapped();
} }
}; };
LookupResult ALWAYS_INLINE find(Key x) LookupResult ALWAYS_INLINE find(const Key & x)
{ {
return dispatch(x, FindCallable{}); return dispatch(*this, x, FindCallable{});
}
ConstLookupResult ALWAYS_INLINE find(const Key & x) const
{
return dispatch(*this, x, FindCallable{});
}
bool ALWAYS_INLINE has(const Key & x, size_t = 0) const
{
return dispatch(*this, x, FindCallable{}) != nullptr;
} }
void write(DB::WriteBuffer & wb) const void write(DB::WriteBuffer & wb) const

View File

@ -16,10 +16,6 @@ template
class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>> class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>
{ {
public: public:
using key_type = Key;
using mapped_type = typename Cell::Mapped;
using value_type = typename Cell::value_type;
using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>; using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>;
using LookupResult = typename Impl::LookupResult; using LookupResult = typename Impl::LookupResult;
@ -32,16 +28,16 @@ public:
this->impls[i].forEachMapped(func); this->impls[i].forEachMapped(func);
} }
mapped_type & ALWAYS_INLINE operator[](Key x) typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
{ {
typename TwoLevelHashMapTable::LookupResult it; LookupResult it;
bool inserted; bool inserted;
this->emplace(x, it, inserted); this->emplace(x, it, inserted);
if (inserted) if (inserted)
new(lookupResultGetMapped(it)) mapped_type(); new (&it->getMapped()) typename Cell::Mapped();
return *lookupResultGetMapped(it); return it->getMapped();
} }
}; };

View File

@ -82,7 +82,9 @@ protected:
public: public:
using key_type = typename Impl::key_type; using key_type = typename Impl::key_type;
using mapped_type = typename Impl::mapped_type;
using value_type = typename Impl::value_type; using value_type = typename Impl::value_type;
using cell_type = typename Impl::cell_type;
using LookupResult = typename Impl::LookupResult; using LookupResult = typename Impl::LookupResult;
using ConstLookupResult = typename Impl::ConstLookupResult; using ConstLookupResult = typename Impl::ConstLookupResult;
@ -217,7 +219,7 @@ public:
emplace(Cell::getKey(x), res.first, res.second, hash_value); emplace(Cell::getKey(x), res.first, res.second, hash_value);
if (res.second) if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x); insertSetMapped(res.first->getMapped(), x);
return res; return res;
} }

View File

@ -8,16 +8,12 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMap
{ {
public: public:
using Key = StringRef; using Key = StringRef;
using key_type = Key;
using Self = TwoLevelStringHashMap; using Self = TwoLevelStringHashMap;
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>; using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
using Base::Base;
using typename Base::Impl;
using mapped_type = TMapped;
using value_type = typename Base::value_type;
using LookupResult = typename Base::LookupResult; using LookupResult = typename Base::LookupResult;
using Base::Base;
template <typename Func> template <typename Func>
void ALWAYS_INLINE forEachMapped(Func && func) void ALWAYS_INLINE forEachMapped(Func && func)
{ {
@ -25,13 +21,13 @@ public:
return this->impls[i].forEachMapped(func); return this->impls[i].forEachMapped(func);
} }
mapped_type & ALWAYS_INLINE operator[](Key x) TMapped & ALWAYS_INLINE operator[](const Key & x)
{ {
bool inserted; bool inserted;
LookupResult it; LookupResult it;
emplace(x, it, inserted); this->emplace(x, it, inserted);
if (inserted) if (inserted)
new (lookupResultGetMapped(it)) mapped_type(); new (&it->getMapped()) TMapped();
return *lookupResultGetMapped(it); return it->getMapped();
} }
}; };

View File

@ -19,8 +19,7 @@ public:
// TODO: currently hashing contains redundant computations when doing distributed or external aggregations // TODO: currently hashing contains redundant computations when doing distributed or external aggregations
size_t hash(const Key & x) const size_t hash(const Key & x) const
{ {
return const_cast<Self &>(*this).dispatch(x, return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; });
[&](const auto &, const auto &, size_t hash) { return hash; });
} }
size_t operator()(const Key & x) const { return hash(x); } size_t operator()(const Key & x) const { return hash(x); }
@ -30,8 +29,12 @@ public:
public: public:
using key_type = typename Impl::key_type; using key_type = typename Impl::key_type;
using mapped_type = typename Impl::mapped_type;
using value_type = typename Impl::value_type; using value_type = typename Impl::value_type;
using cell_type = typename Impl::cell_type;
using LookupResult = typename Impl::LookupResult; using LookupResult = typename Impl::LookupResult;
using ConstLookupResult = typename Impl::ConstLookupResult;
Impl impls[NUM_BUCKETS]; Impl impls[NUM_BUCKETS];
@ -71,16 +74,15 @@ public:
// This function is mostly the same as StringHashTable::dispatch, but with // This function is mostly the same as StringHashTable::dispatch, but with
// added bucket computation. See the comments there. // added bucket computation. See the comments there.
template <typename Func, typename KeyHolder> template <typename Self, typename Func, typename KeyHolder>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{ {
const StringRef & x = keyHolderGetKey(key_holder); const StringRef & x = keyHolderGetKey(key_holder);
const size_t sz = x.size; const size_t sz = x.size;
if (sz == 0) if (sz == 0)
{ {
static constexpr StringKey0 key0{};
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(impls[0].m0, key0, 0); return func(self.impls[0].m0, VoidKey{}, 0);
} }
const char * p = x.data; const char * p = x.data;
@ -113,7 +115,7 @@ public:
auto res = hash(k8); auto res = hash(k8);
auto buck = getBucketFromHash(res); auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(impls[buck].m1, k8, res); return func(self.impls[buck].m1, k8, res);
} }
case 1: case 1:
{ {
@ -124,7 +126,7 @@ public:
auto res = hash(k16); auto res = hash(k16);
auto buck = getBucketFromHash(res); auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(impls[buck].m2, k16, res); return func(self.impls[buck].m2, k16, res);
} }
case 2: case 2:
{ {
@ -135,13 +137,13 @@ public:
auto res = hash(k24); auto res = hash(k24);
auto buck = getBucketFromHash(res); auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(impls[buck].m3, k24, res); return func(self.impls[buck].m3, k24, res);
} }
default: default:
{ {
auto res = hash(x); auto res = hash(x);
auto buck = getBucketFromHash(res); auto buck = getBucketFromHash(res);
return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res); return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
} }
} }
} }
@ -149,12 +151,17 @@ public:
template <typename KeyHolder> template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{ {
dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted}); dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted});
} }
LookupResult ALWAYS_INLINE find(Key x) LookupResult ALWAYS_INLINE find(const Key x)
{ {
return dispatch(x, typename Impl::FindCallable{}); return dispatch(*this, x, typename Impl::FindCallable{});
}
ConstLookupResult ALWAYS_INLINE find(const Key x) const
{
return dispatch(*this, x, typename Impl::FindCallable{});
} }
void write(DB::WriteBuffer & wb) const void write(DB::WriteBuffer & wb) const

View File

@ -369,7 +369,7 @@ private:
if (!it) if (!it)
return nullptr; return nullptr;
return *lookupResultGetMapped(it); return it->getMapped();
} }
void rebuildCounterMap() void rebuildCounterMap()

View File

@ -155,10 +155,10 @@ int main(int argc, char ** argv)
map.emplace(rand(), it, inserted); map.emplace(rand(), it, inserted);
if (inserted) if (inserted)
{ {
new(lookupResultGetMapped(it)) Arr(n); new (&it->getMapped()) Arr(n);
for (size_t j = 0; j < n; ++j) for (size_t j = 0; j < n; ++j)
(*lookupResultGetMapped(it))[j] = field; (it->getMapped())[j] = field;
} }
} }

View File

@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator
{ {
if (prev_it != end && *it == *prev_it) if (prev_it != end && *it == *prev_it)
{ {
++*lookupResultGetMapped(found); ++found->getMapped();
continue; continue;
} }
prev_it = it; prev_it = it;
bool inserted; bool inserted;
map.emplace(*it, found, inserted); map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found); ++found->getMapped();
} }
} }
@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_
{ {
if (*it == *prev_it) if (*it == *prev_it)
{ {
++*lookupResultGetMapped(found); ++found->getMapped();
continue; continue;
} }
prev_it = it; prev_it = it;
bool inserted; bool inserted;
map.emplace(*it, found, inserted); map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found); ++found->getMapped();
} }
} }
@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket)
{ {
for (size_t i = 1; i < num_threads; ++i) for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it) for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it)
maps[0].impls[bucket][it->getFirst()] += it->getSecond(); maps[0].impls[bucket][it->getKey()] += it->getMapped();
} }
void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end) void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end)
@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_
auto found = local_map.find(*it); auto found = local_map.find(*it);
if (found) if (found)
++*lookupResultGetMapped(found); ++found->getMapped();
else if (local_map.size() < threshold) else if (local_map.size() < threshold)
++local_map[*it]; /// TODO You could do one lookup, not two. ++local_map[*it]; /// TODO You could do one lookup, not two.
else else
@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const
Map::LookupResult found; Map::LookupResult found;
bool inserted; bool inserted;
local_map.emplace(*it, found, inserted); local_map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found); ++found->getMapped();
if (inserted && local_map.size() == threshold) if (inserted && local_map.size() == threshold)
{ {
std::lock_guard<Mutex> lock(mutex); std::lock_guard<Mutex> lock(mutex);
for (auto & value_type : local_map) for (auto & value_type : local_map)
global_map[value_type.getFirst()] += value_type.getSecond(); global_map[value_type.getKey()] += value_type.getMapped();
local_map.clear(); local_map.clear();
} }
@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour
auto found = local_map.find(*it); auto found = local_map.find(*it);
if (found) if (found)
++*lookupResultGetMapped(found); ++found->getMapped();
else else
{ {
size_t hash_value = global_map.hash(*it); size_t hash_value = global_map.hash(*it);
@ -311,7 +311,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i) for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it) for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->getFirst()] += it->getSecond(); maps[0][it->getKey()] += it->getMapped();
watch.stop(); watch.stop();
double time_merged = watch.elapsedSeconds(); double time_merged = watch.elapsedSeconds();
@ -365,7 +365,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i) for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it) for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->getFirst()] += it->getSecond(); maps[0][it->getKey()] += it->getMapped();
watch.stop(); watch.stop();
@ -435,7 +435,7 @@ int main(int argc, char ** argv)
continue; continue;
finish = false; finish = false;
maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond(); maps[0][iterators[i]->getKey()] += iterators[i]->getMapped();
++iterators[i]; ++iterators[i];
} }
@ -623,7 +623,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i) for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it) for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond(); global_map[it->getKey()] += it->getMapped();
pool.wait(); pool.wait();
@ -689,7 +689,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i) for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it) for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond(); global_map[it->getKey()] += it->getMapped();
pool.wait(); pool.wait();
@ -760,7 +760,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i) for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it) for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond(); global_map[it->getKey()] += it->getMapped();
pool.wait(); pool.wait();

View File

@ -51,9 +51,9 @@ struct AggregateIndependent
map.emplace(*it, place, inserted); map.emplace(*it, place, inserted);
if (inserted) if (inserted)
creator(*lookupResultGetMapped(place)); creator(place->getMapped());
else else
updater(*lookupResultGetMapped(place)); updater(place->getMapped());
} }
}); });
} }
@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
{ {
if (it != begin && *it == prev_key) if (it != begin && *it == prev_key)
{ {
updater(*lookupResultGetMapped(place)); updater(place->getMapped());
continue; continue;
} }
prev_key = *it; prev_key = *it;
@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization
map.emplace(*it, place, inserted); map.emplace(*it, place, inserted);
if (inserted) if (inserted)
creator(*lookupResultGetMapped(place)); creator(place->getMapped());
else else
updater(*lookupResultGetMapped(place)); updater(place->getMapped());
} }
}); });
} }
@ -131,7 +131,7 @@ struct MergeSequential
auto begin = source_maps[i]->begin(); auto begin = source_maps[i]->begin();
auto end = source_maps[i]->end(); auto end = source_maps[i]->end();
for (auto it = begin; it != end; ++it) for (auto it = begin; it != end; ++it)
merger((*source_maps[0])[it->getFirst()], it->getSecond()); merger((*source_maps[0])[it->getKey()], it->getMapped());
} }
result_map = source_maps[0]; result_map = source_maps[0];
@ -161,7 +161,7 @@ struct MergeSequentialTransposed /// In practice not better than usual.
continue; continue;
finish = false; finish = false;
merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond()); merger((*result_map)[iterators[i]->getKey()], iterators[i]->getMapped());
++iterators[i]; ++iterators[i];
} }

View File

@ -42,7 +42,7 @@ int main(int, char **)
cont[1] = "Goodbye."; cont[1] = "Goodbye.";
for (auto x : cont) for (auto x : cont)
std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl; std::cerr << x.getKey() << " -> " << x.getMapped() << std::endl;
DB::WriteBufferFromOwnString wb; DB::WriteBufferFromOwnString wb;
cont.writeText(wb); cont.writeText(wb);

View File

@ -211,7 +211,7 @@ int main(int argc, char ** argv)
{ {
RefsHashMap::LookupResult inserted_it; RefsHashMap::LookupResult inserted_it;
bool inserted; bool inserted;
set.emplace(StringRef(*lookupResultGetMapped(it)), inserted_it, inserted); set.emplace(StringRef(*it), inserted_it, inserted);
} }
std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, " std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
@ -222,7 +222,7 @@ int main(int argc, char ** argv)
size_t i = 0; size_t i = 0;
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
{ {
devnull.write(it->getFirst().data, it->getFirst().size); devnull.write(it->getKey().data, it->getKey().size);
devnull << std::endl; devnull << std::endl;
} }
@ -249,7 +249,7 @@ int main(int argc, char ** argv)
size_t i = 0; size_t i = 0;
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
{ {
devnull.write(it->getFirst().data, it->getFirst().size); devnull.write(it->getKey().data, it->getKey().size);
devnull << std::endl; devnull << std::endl;
} }
} }

View File

@ -75,7 +75,7 @@ void DataTypeEnum<Type>::fillMaps()
if (!inserted_value.second) if (!inserted_value.second)
throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second) throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
+ " and " + toString(*lookupResultGetMapped(inserted_value.first)), + " and " + toString(inserted_value.first->getMapped()),
ErrorCodes::SYNTAX_ERROR}; ErrorCodes::SYNTAX_ERROR};
const auto inserted_name = value_to_name_map.insert( const auto inserted_name = value_to_name_map.insert(

View File

@ -81,7 +81,7 @@ public:
if (!it) if (!it)
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR}; throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR};
return *lookupResultGetMapped(it); return it->getMapped();
} }
Field castToName(const Field & value_or_name) const override; Field castToName(const Field & value_or_name) const override;

View File

@ -216,7 +216,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
std::vector<size_t> required_rows(outdated_keys.size()); std::vector<size_t> required_rows(outdated_keys.size());
std::transform( std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); }); std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
/// request new values /// request new values
update( update(

View File

@ -311,7 +311,7 @@ private:
std::vector<size_t> required_rows(outdated_keys.size()); std::vector<size_t> required_rows(outdated_keys.size());
std::transform( std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); }); std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
/// request new values /// request new values
update( update(
@ -437,7 +437,7 @@ private:
std::vector<size_t> required_rows(outdated_keys.size()); std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{ {
return pair.getSecond().front(); return pair.getMapped().front();
}); });
update( update(
@ -469,7 +469,7 @@ private:
{ {
const StringRef key = keys_array[row]; const StringRef key = keys_array[row];
const auto it = map.find(key); const auto it = map.find(key);
const auto string_ref = it ? *lookupResultGetMapped(it) : get_default(row); const auto string_ref = it ? it->getMapped() : get_default(row);
out->insertData(string_ref.data, string_ref.size); out->insertData(string_ref.data, string_ref.size);
} }
} }
@ -576,7 +576,7 @@ private:
/// Check which ids have not been found and require setting null_value /// Check which ids have not been found and require setting null_value
for (const auto & key_found_pair : remaining_keys) for (const auto & key_found_pair : remaining_keys)
{ {
if (key_found_pair.getSecond()) if (key_found_pair.getMapped())
{ {
++found_num; ++found_num;
continue; continue;
@ -584,7 +584,7 @@ private:
++not_found_num; ++not_found_num;
auto key = key_found_pair.getFirst(); auto key = key_found_pair.getKey();
const auto hash = StringRefHash{}(key); const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash); const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx; const auto & cell_idx = find_result.cell_idx;

View File

@ -561,7 +561,7 @@ void ComplexKeyHashedDictionary::getItemsImpl(
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool); const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
const auto it = attr.find(key); const auto it = attr.find(key);
set_value(i, it ? static_cast<OutputType>(*lookupResultGetMapped(it)) : get_default(i)); set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
/// free memory allocated for the key /// free memory allocated for the key
temporary_keys_pool.rollback(key.size); temporary_keys_pool.rollback(key.size);
@ -729,7 +729,7 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
std::vector<StringRef> keys; std::vector<StringRef> keys;
keys.reserve(attr.size()); keys.reserve(attr.size());
for (const auto & key : attr) for (const auto & key : attr)
keys.push_back(key.getFirst()); keys.push_back(key.getKey());
return keys; return keys;
} }

View File

@ -13,8 +13,8 @@ template <typename T> auto first(const T & value) -> decltype(value.first) { ret
template <typename T> auto second(const T & value) -> decltype(value.second) { return value.second; } template <typename T> auto second(const T & value) -> decltype(value.second) { return value.second; }
/// HashMap /// HashMap
template <typename T> auto first(const T & value) -> decltype(value.getFirst()) { return value.getFirst(); } template <typename T> auto first(const T & value) -> decltype(value.getKey()) { return value.getKey(); }
template <typename T> auto second(const T & value) -> decltype(value.getSecond()) { return value.getSecond(); } template <typename T> auto second(const T & value) -> decltype(value.getMapped()) { return value.getMapped(); }
} }

View File

@ -127,7 +127,7 @@ void RangeHashedDictionary::getString(
if (it) if (it)
{ {
const auto date = dates[i]; const auto date = dates[i];
const auto & ranges_and_values = *lookupResultGetMapped(it); const auto & ranges_and_values = it->getMapped();
const auto val_it const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v) = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
{ {
@ -398,7 +398,7 @@ void RangeHashedDictionary::getItemsImpl(
if (it) if (it)
{ {
const auto date = dates[i]; const auto date = dates[i];
const auto & ranges_and_values = *lookupResultGetMapped(it); const auto & ranges_and_values = it->getMapped();
const auto val_it const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v) = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
{ {
@ -425,7 +425,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
if (it) if (it)
{ {
auto & values = *lookupResultGetMapped(it); auto & values = it->getMapped();
const auto insert_it const auto insert_it
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range) = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
@ -498,7 +498,7 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i
if (it) if (it)
{ {
auto & values = *lookupResultGetMapped(it); auto & values = it->getMapped();
const auto insert_it = std::lower_bound( const auto insert_it = std::lower_bound(
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range) std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
@ -610,9 +610,9 @@ void RangeHashedDictionary::getIdsAndDates(
for (const auto & key : attr) for (const auto & key : attr)
{ {
for (const auto & value : key.getSecond()) for (const auto & value : key.getMapped())
{ {
ids.push_back(key.getFirst()); ids.push_back(key.getKey());
start_dates.push_back(value.range.left); start_dates.push_back(value.range.left);
end_dates.push_back(value.range.right); end_dates.push_back(value.range.right);

View File

@ -140,8 +140,8 @@ private:
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
map.emplace(addr, it, inserted); map.emplace(addr, it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = impl(addr); it->getMapped() = impl(addr);
return *lookupResultGetMapped(it); return it->getMapped();
} }
}; };

View File

@ -467,15 +467,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
for (const auto & pair : map) for (const auto & pair : map)
{ {
if (pair.getSecond() == args) if (pair.getMapped() == args)
{ {
++result_offset; ++result_offset;
if constexpr (is_numeric_column) if constexpr (is_numeric_column)
result_data.insertValue(pair.getFirst()); result_data.insertValue(pair.getKey());
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value) else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
result_data.insertData(pair.getFirst().data, pair.getFirst().size); result_data.insertData(pair.getKey().data, pair.getKey().size);
else else
result_data.deserializeAndInsertFromArena(pair.getFirst().data); result_data.deserializeAndInsertFromArena(pair.getKey().data);
if (all_nullable) if (all_nullable)
null_map.push_back(0); null_map.push_back(0);

View File

@ -508,7 +508,7 @@ private:
{ {
auto it = table.find(src[i]); auto it = table.find(src[i]);
if (it) if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); /// little endian. memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian.
else else
dst[i] = dst_default; dst[i] = dst_default;
} }
@ -524,7 +524,7 @@ private:
{ {
auto it = table.find(src[i]); auto it = table.find(src[i]);
if (it) if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); /// little endian. memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian.
else else
dst[i] = dst_default[i]; dst[i] = dst_default[i];
} }
@ -540,7 +540,7 @@ private:
{ {
auto it = table.find(src[i]); auto it = table.find(src[i]);
if (it) if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
else else
dst[i] = src[i]; dst[i] = src[i];
} }
@ -557,7 +557,7 @@ private:
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
auto it = table.find(src[i]); auto it = table.find(src[i]);
StringRef ref = it ? *lookupResultGetMapped(it) : dst_default; StringRef ref = it ? it->getMapped() : dst_default;
dst_data.resize(current_dst_offset + ref.size); dst_data.resize(current_dst_offset + ref.size);
memcpy(&dst_data[current_dst_offset], ref.data, ref.size); memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
current_dst_offset += ref.size; current_dst_offset += ref.size;
@ -581,7 +581,7 @@ private:
StringRef ref; StringRef ref;
if (it) if (it)
ref = *lookupResultGetMapped(it); ref = it->getMapped();
else else
{ {
ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]); ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
@ -611,7 +611,7 @@ private:
current_src_offset = src_offsets[i]; current_src_offset = src_offsets[i];
auto it = table.find(ref); auto it = table.find(ref);
if (it) if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
else else
dst[i] = dst_default; dst[i] = dst_default;
} }
@ -632,7 +632,7 @@ private:
current_src_offset = src_offsets[i]; current_src_offset = src_offsets[i];
auto it = table.find(ref); auto it = table.find(ref);
if (it) if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
else else
dst[i] = dst_default[i]; dst[i] = dst_default[i];
} }
@ -655,7 +655,7 @@ private:
auto it = table.find(src_ref); auto it = table.find(src_ref);
StringRef dst_ref = it ? *lookupResultGetMapped(it) : (with_default ? dst_default : src_ref); StringRef dst_ref = it ? it->getMapped() : (with_default ? dst_default : src_ref);
dst_data.resize(current_dst_offset + dst_ref.size); dst_data.resize(current_dst_offset + dst_ref.size);
memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size); memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
current_dst_offset += dst_ref.size; current_dst_offset += dst_ref.size;
@ -697,7 +697,7 @@ private:
StringRef dst_ref; StringRef dst_ref;
if (it) if (it)
dst_ref = *lookupResultGetMapped(it); dst_ref = it->getMapped();
else else
{ {
dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]); dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);

View File

@ -180,8 +180,6 @@ struct AggregationMethodOneNumber
using Data = TData; using Data = TData;
using Key = typename Data::key_type; using Key = typename Data::key_type;
using Mapped = typename Data::mapped_type; using Mapped = typename Data::mapped_type;
using iterator = typename Data::iterator;
using const_iterator = typename Data::const_iterator;
Data data; Data data;
@ -356,8 +354,6 @@ struct AggregationMethodKeysFixed
using Data = TData; using Data = TData;
using Key = typename Data::key_type; using Key = typename Data::key_type;
using Mapped = typename Data::mapped_type; using Mapped = typename Data::mapped_type;
using iterator = typename Data::iterator;
using const_iterator = typename Data::const_iterator;
static constexpr bool has_nullable_keys = has_nullable_keys_; static constexpr bool has_nullable_keys = has_nullable_keys_;
static constexpr bool has_low_cardinality = has_low_cardinality_; static constexpr bool has_low_cardinality = has_low_cardinality_;

View File

@ -1268,7 +1268,7 @@ private:
for (; it != end; ++it) for (; it != end; ++it)
{ {
const Mapped & mapped = it->getSecond(); const Mapped & mapped = it->getMapped();
if (mapped.getUsed()) if (mapped.getUsed())
continue; continue;

View File

@ -162,8 +162,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
{ {
new(lookupResultGetMapped(it)) Value; new (&it->getMapped()) Value;
std::swap(*lookupResultGetMapped(it), value); std::swap(it->getMapped(), value);
INIT INIT
} }
} }
@ -193,8 +193,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
{ {
new(lookupResultGetMapped(it)) Value; new (&it->getMapped()) Value;
std::swap(*lookupResultGetMapped(it), value); std::swap(it->getMapped(), value);
INIT INIT
} }
} }
@ -225,8 +225,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
{ {
new(lookupResultGetMapped(it)) Value; new (&it->getMapped()) Value;
std::swap(*lookupResultGetMapped(it), value); std::swap(it->getMapped(), value);
INIT INIT
} }
} }

View File

@ -85,7 +85,7 @@ int main(int, char **)
std::cerr << "Collisions: " << map.getCollisions() << std::endl; std::cerr << "Collisions: " << map.getCollisions() << std::endl;
for (auto x : map) for (auto x : map)
std::cerr << x.getFirst().toString() << " -> " << x.getSecond() << std::endl; std::cerr << x.getKey().toString() << " -> " << x.getMapped() << std::endl;
return 0; return 0;
} }

View File

@ -55,15 +55,15 @@ void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 1; it->getMapped() = 1;
else else
++*lookupResultGetMapped(it); ++it->getMapped();
} }
for (size_t i = 0, size = data.size(); i < size; ++i) for (size_t i = 0, size = data.size(); i < size; ++i)
{ {
auto it = map.find(data[i]); auto it = map.find(data[i]);
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();
std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size() std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
@ -81,10 +81,10 @@ void insert(Map & map, StringRef & k)
typename Map::LookupResult it; typename Map::LookupResult it;
map.emplace(k, it, inserted, nullptr); map.emplace(k, it, inserted, nullptr);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 1; it->getMapped() = 1;
else else
++*lookupResultGetMapped(it); ++it->getMapped();
std::cout << *lookupResultGetMapped(map.find(k))<< std::endl; std::cout << map.find(k)->getMapped() << std::endl;
} }
int main(int argc, char ** argv) int main(int argc, char ** argv)

View File

@ -337,8 +337,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();
@ -366,8 +366,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();
@ -396,8 +396,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();
@ -426,8 +426,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();

View File

@ -595,8 +595,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
{ {
map.emplace(static_cast<const Key &>(data[i]), it, inserted); map.emplace(static_cast<const Key &>(data[i]), it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();

View File

@ -442,8 +442,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
{ {
map.emplace(static_cast<const Key &>(data[i]), it, inserted); map.emplace(static_cast<const Key &>(data[i]), it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();

View File

@ -144,8 +144,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();
@ -173,8 +173,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted); map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();

View File

@ -151,8 +151,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, DB::Arena &, const cha
{ {
map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted); map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();

View File

@ -67,8 +67,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(data[i], it, inserted); map.emplace(data[i], it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();
@ -82,7 +82,7 @@ int main(int argc, char ** argv)
size_t elems = 0; size_t elems = 0;
for (const auto & kv : map) for (const auto & kv : map)
{ {
sum_counts += kv.getSecond(); sum_counts += kv.getMapped();
++elems; ++elems;
} }
@ -103,8 +103,8 @@ int main(int argc, char ** argv)
{ {
map.emplace(i, it, inserted); map.emplace(i, it, inserted);
if (inserted) if (inserted)
*lookupResultGetMapped(it) = 0; it->getMapped() = 0;
++*lookupResultGetMapped(it); ++it->getMapped();
} }
watch.stop(); watch.stop();
@ -118,11 +118,11 @@ int main(int argc, char ** argv)
size_t elems = 0; size_t elems = 0;
for (const auto & kv : map) for (const auto & kv : map)
{ {
sum_counts += kv.getSecond(); sum_counts += kv.getMapped();
++elems; ++elems;
if (kv.getFirst() > n) if (kv.getKey() > n)
std::cerr << kv.getFirst() << std::endl; std::cerr << kv.getKey() << std::endl;
} }
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl; std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;

View File

@ -65,9 +65,9 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
if (prev_positions.size() > key_index if (prev_positions.size() > key_index
&& prev_positions[key_index] && prev_positions[key_index]
&& name == *lookupResultGetKey(prev_positions[key_index])) && name == prev_positions[key_index]->getKey())
{ {
return *lookupResultGetMapped(prev_positions[key_index]); return prev_positions[key_index]->getMapped();
} }
else else
{ {
@ -78,7 +78,7 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
if (key_index < prev_positions.size()) if (key_index < prev_positions.size())
prev_positions[key_index] = it; prev_positions[key_index] = it;
return *lookupResultGetMapped(it); return it->getMapped();
} }
else else
return UNKNOWN_FIELD; return UNKNOWN_FIELD;

View File

@ -131,7 +131,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
} }
else else
{ {
index = *lookupResultGetMapped(it); index = it->getMapped();
if (seen_columns[index]) if (seen_columns[index])
throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);

View File

@ -57,7 +57,7 @@ void buildScatterSelector(
throw Exception("Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).", ErrorCodes::TOO_MANY_PARTS); throw Exception("Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).", ErrorCodes::TOO_MANY_PARTS);
partition_num_to_first_row.push_back(i); partition_num_to_first_row.push_back(i);
*lookupResultGetMapped(it) = partitions_count; it->getMapped() = partitions_count;
++partitions_count; ++partitions_count;
@ -70,7 +70,7 @@ void buildScatterSelector(
} }
if (partitions_count > 1) if (partitions_count > 1)
selector[i] = *lookupResultGetMapped(it); selector[i] = it->getMapped();
} }
} }

View File

@ -333,9 +333,9 @@ private:
{ {
for (size_t j = 0; j < columns.size(); ++j) for (size_t j = 0; j < columns.size(); ++j)
if (j == key_pos) if (j == key_pos)
columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst())); columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey()));
else else
columns[j]->insertFrom(*it->getSecond().block->getByPosition(column_indices[j]).column.get(), it->getSecond().row_num); columns[j]->insertFrom(*it->getMapped().block->getByPosition(column_indices[j]).column.get(), it->getMapped().row_num);
++rows_added; ++rows_added;
} }
else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
@ -343,11 +343,11 @@ private:
throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED);
} }
else else
for (auto ref_it = it->getSecond().begin(); ref_it.ok(); ++ref_it) for (auto ref_it = it->getMapped().begin(); ref_it.ok(); ++ref_it)
{ {
for (size_t j = 0; j < columns.size(); ++j) for (size_t j = 0; j < columns.size(); ++j)
if (j == key_pos) if (j == key_pos)
columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst())); columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey()));
else else
columns[j]->insertFrom(*ref_it->block->getByPosition(column_indices[j]).column.get(), ref_it->row_num); columns[j]->insertFrom(*ref_it->block->getByPosition(column_indices[j]).column.get(), ref_it->row_num);
++rows_added; ++rows_added;

View File

@ -1,15 +1,60 @@
# DateTime {#data_type-datetime} # DateTime {#data_type-datetime}
Date with time. Stored in four bytes as a Unix timestamp (unsigned). Allows storing values in the same range as for the Date type. The minimal value is output as 0000-00-00 00:00:00. Data structure storing Unix timestamp. Also, it can store a time zone.
The time is stored with accuracy up to one second (without leap seconds).
## Time Zones Syntax:
The date with time is converted from text (divided into component parts) to binary and back, using the system's time zone at the time the client or server starts. In text format, information about daylight savings is lost. ```sql
DateTime([timezone])
```
By default, the client switches to the timezone of the server when it connects. You can change this behavior by enabling the client command-line option `--use_client_time_zone`. Range of values in the Unix timestamp: [1970-01-01 00:00:00, 2105-12-31 23:59:59].
So when working with a textual date (for example, when saving text dumps), keep in mind that there may be ambiguity during changes for daylight savings time, and there may be problems matching data if the time zone changed. Resolution: 1 second.
## Usage remarks
ClickHouse stores date and time values in the Unix timestamp format that is independent of the time zones and daylight saving rules. The time zone value affects displaying `DateTime` values in text formats and parsing the input strings for storage. You can find the list of supported time zones in [IANA Time Zone Database](https://www.iana.org/time-zones).
You can explicitly set a time zone for `DateTime`-type column when creating a table. If time zone isn't set, ClickHouse uses the value of the [timezone](../operations/server_settings/settings.md#server_settings-timezone) server configuration parameter or the operating system settings at the moment of the ClickHouse server start.
The [clickhouse-client](../interfaces/cli.md) applies the server time zone by default if a time zone isn't explicitly defined when initializing the data type. To use the client time zone, run it with the `--use_client_time_zone` parameter.
ClickHouse outputs values in the `YYYY-MM-DD hh:mm:ss` text format by default. You can change the format with the [formatDateTime](../query_language/functions/date_time_functions.md#formatdatetime) function.
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the [date_time_input_format](../operations/settings/settings.md#settings-date_time_input_format) setting value.
## Examples
Creating a table with a `DateTime`-type column:
```sql
CREATE TABLE dt(
timestamp DateTime('Europe/Moscow')
)
```
Getting a time zone for a `DateTime`-type value:
```sql
SELECT
toDateTime(now(), 'Europe/Moscow') AS column,
toTypeName(column) AS x
```
```text
┌──────────────column─┬─x─────────────────────────┐
│ 2019-10-16 04:12:04 │ DateTime('Europe/Moscow') │
└─────────────────────┴───────────────────────────┘
```
## See Also
- [Type Conversion Functions](../query_language/functions/type_conversion_functions.md)
- [Functions for Working with Dates and Times](../query_language/functions/date_time_functions.md)
- [Functions for Working with Arrays](../query_language/functions/array_functions.md)
- [The `date_time_input_format` setting](../operations/settings/settings.md#settings-date_time_input_format)
- [The `timezone` server configuration parameter](../operations/server_settings/settings.md#server_settings-timezone)
- [Operator for Working with Dates and Times](../query_language/operators.md#operators-datetime)
- [The `Date` data type](date.md)
[Original article](https://clickhouse.yandex/docs/en/data_types/datetime/) <!--hide--> [Original article](https://clickhouse.yandex/docs/en/data_types/datetime/) <!--hide-->

View File

@ -0,0 +1,74 @@
# Interval {#data-type-interval}
The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../query_language/operators.md#operator-interval) operator.
!!! warning "Warning"
You can't use the `Interval` data types for storing values in tables.
Structure:
- Time interval as unsigned integer value.
- Type of an interval.
Supported interval types:
- `SECOND`
- `MINUTE`
- `HOUR`
- `DAY`
- `WEEK`
- `MONTH`
- `QUARTER`
- `YEAR`
For each interval type, there is the separated data type. For example, the `DAY` interval is expressed as the `IntervalDay` data type:
```sql
SELECT toTypeName(INTERVAL 4 DAY)
```
```text
┌─toTypeName(toIntervalDay(4))─┐
│ IntervalDay │
└──────────────────────────────┘
```
## Usage Remarks {#data-type-interval-usage-remarks}
You can use `Interval`-type values in arithmetical operations with [Date](../../data_types/date.md) and [DateTime](../../data_types/datetime.md)-type values. For example, you can add 4 days to the current time:
```sql
SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY
```
```text
┌───current_date_time─┬─plus(now(), toIntervalDay(4))─┐
│ 2019-10-23 10:58:45 │ 2019-10-27 10:58:45 │
└─────────────────────┴───────────────────────────────┘
```
Intervals of different types can't be combined. You can't use intervals like `4 DAY 1 HOUR`, express intervals in the units that smaller or equal the the smallest unit of the interval. For example, `1 day and an hour` interval can be expressed as `25 HOUR` or `90000 SECOND`.
You can't perform arithmetical operations with the `Interval`-type values, but you can add intervals of different types consequently to some value. For example:
```sql
SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
```
```text
┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │
└─────────────────────┴────────────────────────────────────────────────────────┘
```
The following query causes the exception:
```sql
select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR)
```
```text
Received exception from server (version 19.14.1):
Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime..
```
## See Also
- [INTERVAL](../../query_language/operators.md#operator-interval) operator
- [toInterval](../../query_language/functions/type_conversion_functions.md#function-tointerval) type convertion functions

View File

@ -625,7 +625,7 @@ For the value of the `incl` attribute, see the section "[Configuration files](..
- [skip_unavailable_shards](../settings/settings.md#settings-skip_unavailable_shards) - [skip_unavailable_shards](../settings/settings.md#settings-skip_unavailable_shards)
## timezone ## timezone {#server_settings-timezone}
The server's time zone. The server's time zone.

View File

@ -334,7 +334,7 @@ For a time interval starting at 'StartTime' and continuing for 'Duration' second
For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`. For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`.
This is necessary for searching for pageviews in the corresponding session. This is necessary for searching for pageviews in the corresponding session.
## formatDateTime(Time, Format\[, Timezone\]) ## formatDateTime(Time, Format\[, Timezone\]) {#formatdatetime}
Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column. Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column.

View File

@ -321,7 +321,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
└─────────────────────────────────────────┘ └─────────────────────────────────────────┘
``` ```
## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second) ## toInterval(Year|Quarter|Month|Week|Day|Hour|Minute|Second) {#function-tointerval}
Converts a Number type argument to a Interval type (duration). Converts a Number type argument to a Interval type (duration).
The interval type is actually very useful, you can use this type of data to perform arithmetic operations directly with Date or DateTime. At the same time, ClickHouse provides a more convenient syntax for declaring Interval type data. For example: The interval type is actually very useful, you can use this type of data to perform arithmetic operations directly with Date or DateTime. At the same time, ClickHouse provides a more convenient syntax for declaring Interval type data. For example:

View File

@ -65,7 +65,9 @@ Groups of operators are listed in order of priority (the higher it is in the lis
`a GLOBAL NOT IN ...` The `globalNotIn(a, b) function.` `a GLOBAL NOT IN ...` The `globalNotIn(a, b) function.`
## Operator for Working With Dates and Times {#operators-datetime} ## Operators for Working with Dates and Times {#operators-datetime}
### EXTRACT {#operator-extract}
```sql ```sql
EXTRACT(part FROM date); EXTRACT(part FROM date);
@ -120,7 +122,6 @@ SELECT
FROM test.Orders; FROM test.Orders;
``` ```
```text ```text
┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐ ┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
│ 2008 │ 10 │ 11 │ 13 │ 23 │ 44 │ │ 2008 │ 10 │ 11 │ 13 │ 23 │ 44 │
└───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘ └───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
@ -128,6 +129,39 @@ FROM test.Orders;
You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00619_extract.sql). You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00619_extract.sql).
### INTERVAL {#operator-interval}
Creates an [Interval](../data_types/special_data_types/interval.md)-type value that should be used in arithmetical operations with [Date](../data_types/date.md) and [DateTime](../data_types/datetime.md)-type values.
Types of intervals:
- `SECOND`
- `MINUTE`
- `HOUR`
- `DAY`
- `WEEK`
- `MONTH`
- `QUARTER`
- `YEAR`
!!! warning "Warning"
Intervals of different types can't be combined. You can't use the expressions like `INTERVAL 4 DAY 1 HOUR`. Express intervals in the units that smaller or equal the the smallest unit of the interval, for example `INTERVAL 25 HOUR`. Also you can use consequtive operations like in the example below.
Example:
```sql
SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
```
```text
┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │
└─────────────────────┴────────────────────────────────────────────────────────┘
```
**See Also**
- [Interval](../data_types/special_data_types/interval.md) data type
- [toInterval](functions/type_conversion_functions.md#function-tointerval) type convertion functions
## Logical Negation Operator ## Logical Negation Operator
`NOT a` The `not(a) function.` `NOT a` The `not(a) function.`

View File

@ -0,0 +1 @@
../../../en/data_types/special_data_types/interval.md

View File

@ -172,6 +172,7 @@ nav:
- 'Expression': 'data_types/special_data_types/expression.md' - 'Expression': 'data_types/special_data_types/expression.md'
- 'Set': 'data_types/special_data_types/set.md' - 'Set': 'data_types/special_data_types/set.md'
- 'Nothing': 'data_types/special_data_types/nothing.md' - 'Nothing': 'data_types/special_data_types/nothing.md'
- 'Interval': 'data_types/special_data_types/interval.md'
- 'Domains': - 'Domains':
- 'Overview': 'data_types/domains/overview.md' - 'Overview': 'data_types/domains/overview.md'
- 'IPv4': 'data_types/domains/ipv4.md' - 'IPv4': 'data_types/domains/ipv4.md'

View File

@ -57,6 +57,7 @@ nav:
- 'Expression': 'data_types/special_data_types/expression.md' - 'Expression': 'data_types/special_data_types/expression.md'
- 'Set': 'data_types/special_data_types/set.md' - 'Set': 'data_types/special_data_types/set.md'
- 'Nothing': 'data_types/special_data_types/nothing.md' - 'Nothing': 'data_types/special_data_types/nothing.md'
- 'Interval': 'data_types/special_data_types/interval.md'
- 'Domains': - 'Domains':
- 'Overview': 'data_types/domains/overview.md' - 'Overview': 'data_types/domains/overview.md'
- 'IPv4': 'data_types/domains/ipv4.md' - 'IPv4': 'data_types/domains/ipv4.md'

View File

@ -56,6 +56,7 @@ nav:
- 'Expression': 'data_types/special_data_types/expression.md' - 'Expression': 'data_types/special_data_types/expression.md'
- 'Set': 'data_types/special_data_types/set.md' - 'Set': 'data_types/special_data_types/set.md'
- 'Nothing': 'data_types/special_data_types/nothing.md' - 'Nothing': 'data_types/special_data_types/nothing.md'
- 'Interval': 'data_types/special_data_types/interval.md'
- 'Domain类型': - 'Domain类型':
- '介绍': 'data_types/domains/overview.md' - '介绍': 'data_types/domains/overview.md'
- 'IPv4': 'data_types/domains/ipv4.md' - 'IPv4': 'data_types/domains/ipv4.md'

View File

@ -0,0 +1 @@
../../../en/data_types/special_data_types/interval.md

View File

@ -145,7 +145,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
└─────────────────────────────────────────┘ └─────────────────────────────────────────┘
``` ```
## toIntervalYear, toIntervalQuarter, toIntervalMonth, toIntervalWeek, toIntervalDay, toIntervalHour, toIntervalMinute, toIntervalSecond ## toIntervalYear, toIntervalQuarter, toIntervalMonth, toIntervalWeek, toIntervalDay, toIntervalHour, toIntervalMinute, toIntervalSecond {#function-tointerval}
将数字类型参数转换为Interval类型时间区间 将数字类型参数转换为Interval类型时间区间
Interval类型实际上是非常有用的您可以使用此类型的数据直接与Date或DateTime执行算术运算。同时ClickHouse为Interval类型数据的声明提供了更方便的语法。例如 Interval类型实际上是非常有用的您可以使用此类型的数据直接与Date或DateTime执行算术运算。同时ClickHouse为Interval类型数据的声明提供了更方便的语法。例如

View File

@ -82,6 +82,92 @@
条件运算符会先计算表达式b和表达式c的值再根据表达式a的真假返回相应的值。如果表达式b和表达式c是 [arrayJoin()](functions/array_join.md#functions_arrayjoin) 函数则不管表达式a是真是假每行都会被复制展开。 条件运算符会先计算表达式b和表达式c的值再根据表达式a的真假返回相应的值。如果表达式b和表达式c是 [arrayJoin()](functions/array_join.md#functions_arrayjoin) 函数则不管表达式a是真是假每行都会被复制展开。
## Operators for Working with Dates and Times {#operators-datetime}
### EXTRACT {#operator-extract}
```sql
EXTRACT(part FROM date);
```
Extracts a part from a given date. For example, you can retrieve a month from a given date, or a second from a time.
The `part` parameter specifies which part of the date to retrieve. The following values are available:
- `DAY` — The day of the month. Possible values: 131.
- `MONTH` — The number of a month. Possible values: 112.
- `YEAR` — The year.
- `SECOND` — The second. Possible values: 059.
- `MINUTE` — The minute. Possible values: 059.
- `HOUR` — The hour. Possible values: 023.
The `part` parameter is case-insensitive.
The `date` parameter specifies the date or the time to process. Either [Date](../data_types/date.md) or [DateTime](../data_types/datetime.md) type is supported.
Examples:
```sql
SELECT EXTRACT(DAY FROM toDate('2017-06-15'));
SELECT EXTRACT(MONTH FROM toDate('2017-06-15'));
SELECT EXTRACT(YEAR FROM toDate('2017-06-15'));
```
In the following example we create a table and insert into it a value with the `DateTime` type.
```sql
CREATE TABLE test.Orders
(
OrderId UInt64,
OrderName String,
OrderDate DateTime
)
ENGINE = Log;
```
```sql
INSERT INTO test.Orders VALUES (1, 'Jarlsberg Cheese', toDateTime('2008-10-11 13:23:44'));
```
```sql
SELECT
toYear(OrderDate) AS OrderYear,
toMonth(OrderDate) AS OrderMonth,
toDayOfMonth(OrderDate) AS OrderDay,
toHour(OrderDate) AS OrderHour,
toMinute(OrderDate) AS OrderMinute,
toSecond(OrderDate) AS OrderSecond
FROM test.Orders;
```
```text
┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
│ 2008 │ 10 │ 11 │ 13 │ 23 │ 44 │
└───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
```
You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00619_extract.sql).
### INTERVAL {#operator-interval}
Creates an [Interval](../data_types/special_data_types/interval.md)-type value that should be used in arithmetical operations with [Date](../data_types/date.md) and [DateTime](../data_types/datetime.md)-type values.
Example:
```sql
SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
```
```text
┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │
└─────────────────────┴────────────────────────────────────────────────────────┘
```
**See Also**
- [Interval](../data_types/special_data_types/interval.md) data type
- [toInterval](functions/type_conversion_functions.md#function-tointerval) type convertion functions
## CASE条件表达式 {#operator_case} ## CASE条件表达式 {#operator_case}
``` sql ``` sql

View File

@ -64,6 +64,18 @@ def get_merge_base(first, second, project_root):
logging.error('Cannot find merge base for %s and %s', first, second) logging.error('Cannot find merge base for %s and %s', first, second)
raise raise
def rev_parse(rev, project_root):
try:
command = "git rev-parse {}".format(rev)
text = subprocess.check_output(command, shell=True, cwd=project_root)
text = text.decode('utf-8', 'ignore')
sha = tuple(filter(len, text.split()))[0]
check_sha(sha)
return sha
except Exception:
logging.error('Cannot find revision %s', rev)
raise
# Get list of commits from branch to base_sha. Update commits_info. # Get list of commits from branch to base_sha. Update commits_info.
def get_commits_from_branch(repo, branch, base_sha, commits_info, max_pages, token, max_retries, retry_timeout): def get_commits_from_branch(repo, branch, base_sha, commits_info, max_pages, token, max_retries, retry_timeout):
@ -230,7 +242,7 @@ def parse_one_pull_request(item):
if lines: if lines:
for i in range(len(lines) - 1): for i in range(len(lines) - 1):
if re.match(r'(?i)category.*:$', lines[i]): if re.match(r'(?i).*category.*:$', lines[i]):
cat_pos = i cat_pos = i
if re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]): if re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]):
short_descr_pos = i short_descr_pos = i
@ -460,5 +472,7 @@ if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
repo_folder = os.path.expanduser(repo_folder) repo_folder = os.path.expanduser(repo_folder)
new_release_tag = rev_parse(new_release_tag, repo_folder)
prev_release_tag = rev_parse(prev_release_tag, repo_folder)
make_changelog(new_release_tag, prev_release_tag, pull_requests, repo, repo_folder, state_file, token, max_retry, retry_timeout) make_changelog(new_release_tag, prev_release_tag, pull_requests, repo, repo_folder, state_file, token, max_retry, retry_timeout)

View File

@ -105,7 +105,7 @@ public:
if (table.end() == it) if (table.end() == it)
return pos - data; return pos - data;
*pos = it->getSecond().sample(random()); *pos = it->getMapped().sample(random());
/// Zero byte marks end of string. /// Zero byte marks end of string.
if (0 == *pos) if (0 == *pos)
@ -125,12 +125,12 @@ public:
for (auto & elem : table) for (auto & elem : table)
{ {
UInt32 new_total = 0; UInt32 new_total = 0;
for (auto & frequency : elem.getSecond().data) for (auto & frequency : elem.getMapped().data)
{ {
frequency.count = transform(frequency.count); frequency.count = transform(frequency.count);
new_total += frequency.count; new_total += frequency.count;
} }
elem.getSecond().total = new_total; elem.getMapped().total = new_total;
} }
} }
@ -142,10 +142,10 @@ public:
for (const auto & elem : table) for (const auto & elem : table)
{ {
writeBinary(elem.getFirst(), out); writeBinary(elem.getKey(), out);
writeBinary(UInt8(elem.getSecond().data.size()), out); writeBinary(UInt8(elem.getMapped().data.size()), out);
for (const auto & frequency : elem.getSecond().data) for (const auto & frequency : elem.getMapped().data)
{ {
writeBinary(frequency.byte, out); writeBinary(frequency.byte, out);
writeVarUInt(frequency.count, out); writeVarUInt(frequency.count, out);