mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 12:22:12 +00:00
Merge pull request #66952 from ClickHouse/backport/24.6/66579
Backport #66579 to 24.6: Fix weak hash for sparse
This commit is contained in:
commit
dddd8e2695
@ -362,13 +362,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
|
|||||||
hash.update(wbuf.str().c_str(), wbuf.str().size());
|
hash.update(wbuf.str().c_str(), wbuf.str().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnAggregateFunction::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = data.size();
|
auto s = data.size();
|
||||||
if (hash.getData().size() != data.size())
|
WeakHash32 hash(s);
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
|
|
||||||
|
|
||||||
auto & hash_data = hash.getData();
|
auto & hash_data = hash.getData();
|
||||||
|
|
||||||
std::vector<UInt8> v;
|
std::vector<UInt8> v;
|
||||||
@ -379,6 +376,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
|
|||||||
wbuf.finalize();
|
wbuf.finalize();
|
||||||
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
|
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
|
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
|
||||||
|
@ -170,7 +170,7 @@ public:
|
|||||||
|
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
|
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
|
|
||||||
|
@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
getData().updateHashWithValue(offset + i, hash);
|
getData().updateHashWithValue(offset + i, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnArray::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = offsets->size();
|
auto s = offsets->size();
|
||||||
if (hash.getData().size() != s)
|
WeakHash32 hash(s);
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", s, hash.getData().size());
|
|
||||||
|
|
||||||
WeakHash32 internal_hash(data->size());
|
WeakHash32 internal_hash = data->getWeakHash32();
|
||||||
data->updateWeakHash32(internal_hash);
|
|
||||||
|
|
||||||
Offset prev_offset = 0;
|
Offset prev_offset = 0;
|
||||||
const auto & offsets_data = getOffsets();
|
const auto & offsets_data = getOffsets();
|
||||||
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
|
|||||||
|
|
||||||
prev_offset = offsets_data[i];
|
prev_offset = offsets_data[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnArray::updateHashFast(SipHash & hash) const
|
void ColumnArray::updateHashFast(SipHash & hash) const
|
||||||
|
@ -82,7 +82,7 @@ public:
|
|||||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||||
const char * skipSerializedInArena(const char * pos) const override;
|
const char * skipSerializedInArena(const char * pos) const override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||||
void insert(const Field & x) override;
|
void insert(const Field & x) override;
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <optional>
|
#include <optional>
|
||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
#include <Columns/IColumn.h>
|
#include <Columns/IColumn.h>
|
||||||
|
#include <Common/WeakHash.h>
|
||||||
#include <IO/BufferWithOwnMemory.h>
|
#include <IO/BufferWithOwnMemory.h>
|
||||||
|
|
||||||
|
|
||||||
@ -94,7 +95,7 @@ public:
|
|||||||
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
|
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
|
||||||
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
|
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
|
||||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
|
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
|
||||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
|
WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
|
||||||
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
|
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
|
||||||
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
|
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
|
||||||
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
|
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
|
||||||
|
@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnConst::getWeakHash32() const
|
||||||
{
|
{
|
||||||
if (hash.getData().size() != s)
|
WeakHash32 element_hash = data->getWeakHash32();
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
return WeakHash32(s, element_hash.getData()[0]);
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
WeakHash32 element_hash(1);
|
|
||||||
data->updateWeakHash32(element_hash);
|
|
||||||
size_t data_hash = element_hash.getData()[0];
|
|
||||||
|
|
||||||
for (auto & value : hash.getData())
|
|
||||||
value = static_cast<UInt32>(intHashCRC32(data_hash, value));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnConst::compareColumn(
|
void ColumnConst::compareColumn(
|
||||||
|
@ -190,7 +190,7 @@ public:
|
|||||||
data->updateHashWithValue(0, hash);
|
data->updateHashWithValue(0, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
|
|
||||||
void updateHashFast(SipHash & hash) const override
|
void updateHashFast(SipHash & hash) const override
|
||||||
{
|
{
|
||||||
|
@ -28,7 +28,6 @@ namespace ErrorCodes
|
|||||||
extern const int PARAMETER_OUT_OF_BOUND;
|
extern const int PARAMETER_OUT_OF_BOUND;
|
||||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||||
extern const int NOT_IMPLEMENTED;
|
extern const int NOT_IMPLEMENTED;
|
||||||
extern const int LOGICAL_ERROR;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <is_decimal T>
|
template <is_decimal T>
|
||||||
@ -72,13 +71,10 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <is_decimal T>
|
template <is_decimal T>
|
||||||
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnDecimal<T>::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = data.size();
|
auto s = data.size();
|
||||||
|
WeakHash32 hash(s);
|
||||||
if (hash.getData().size() != s)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
const T * begin = data.data();
|
const T * begin = data.data();
|
||||||
const T * end = begin + s;
|
const T * end = begin + s;
|
||||||
@ -90,6 +86,8 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
|
|||||||
++begin;
|
++begin;
|
||||||
++hash_data;
|
++hash_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <is_decimal T>
|
template <is_decimal T>
|
||||||
|
@ -90,7 +90,7 @@ public:
|
|||||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||||
const char * skipSerializedInArena(const char * pos) const override;
|
const char * skipSerializedInArena(const char * pos) const override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
||||||
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <Columns/ColumnVector.h>
|
#include <Columns/ColumnVector.h>
|
||||||
#include <Columns/ColumnVariant.h>
|
#include <Columns/ColumnVariant.h>
|
||||||
#include <DataTypes/IDataType.h>
|
#include <DataTypes/IDataType.h>
|
||||||
|
#include <Common/WeakHash.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -167,9 +168,9 @@ public:
|
|||||||
|
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override
|
WeakHash32 getWeakHash32() const override
|
||||||
{
|
{
|
||||||
variant_column->updateWeakHash32(hash);
|
return variant_column->getWeakHash32();
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateHashFast(SipHash & hash) const override
|
void updateHashFast(SipHash & hash) const override
|
||||||
|
@ -129,14 +129,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
|
|||||||
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
|
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnFixedString::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = size();
|
auto s = size();
|
||||||
|
WeakHash32 hash(s);
|
||||||
if (hash.getData().size() != s)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, "
|
|
||||||
"hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
const UInt8 * pos = chars.data();
|
const UInt8 * pos = chars.data();
|
||||||
UInt32 * hash_data = hash.getData().data();
|
UInt32 * hash_data = hash.getData().data();
|
||||||
@ -148,6 +144,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
|
|||||||
pos += n;
|
pos += n;
|
||||||
++hash_data;
|
++hash_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnFixedString::updateHashFast(SipHash & hash) const
|
void ColumnFixedString::updateHashFast(SipHash & hash) const
|
||||||
|
@ -125,7 +125,7 @@ public:
|
|||||||
|
|
||||||
void updateHashWithValue(size_t index, SipHash & hash) const override;
|
void updateHashWithValue(size_t index, SipHash & hash) const override;
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
|
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <Core/NamesAndTypes.h>
|
#include <Core/NamesAndTypes.h>
|
||||||
#include <Core/ColumnsWithTypeAndName.h>
|
#include <Core/ColumnsWithTypeAndName.h>
|
||||||
#include <Columns/IColumn.h>
|
#include <Columns/IColumn.h>
|
||||||
|
#include <Common/WeakHash.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -122,9 +123,9 @@ public:
|
|||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 &) const override
|
WeakHash32 getWeakHash32() const override
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName());
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateHashFast(SipHash &) const override
|
void updateHashFast(SipHash &) const override
|
||||||
|
@ -7,8 +7,7 @@
|
|||||||
#include <Common/HashTable/HashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
#include <Common/WeakHash.h>
|
#include <Common/WeakHash.h>
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
#include "Storages/IndicesDescription.h"
|
#include <base/types.h>
|
||||||
#include "base/types.h"
|
|
||||||
#include <base/sort.h>
|
#include <base/sort.h>
|
||||||
#include <base/scope_guard.h>
|
#include <base/scope_guard.h>
|
||||||
|
|
||||||
@ -312,19 +311,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
|
|||||||
return getDictionary().skipSerializedInArena(pos);
|
return getDictionary().skipSerializedInArena(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnLowCardinality::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = size();
|
WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32();
|
||||||
|
return idx.getWeakHash(dict_hash);
|
||||||
if (hash.getData().size() != s)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
const auto & dict = getDictionary().getNestedColumn();
|
|
||||||
WeakHash32 dict_hash(dict->size());
|
|
||||||
dict->updateWeakHash32(dict_hash);
|
|
||||||
|
|
||||||
idx.updateWeakHash(hash, dict_hash);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnLowCardinality::updateHashFast(SipHash & hash) const
|
void ColumnLowCardinality::updateHashFast(SipHash & hash) const
|
||||||
@ -820,10 +810,11 @@ bool ColumnLowCardinality::Index::containsDefault() const
|
|||||||
return contains;
|
return contains;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const
|
WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const
|
||||||
{
|
{
|
||||||
|
WeakHash32 hash(positions->size());
|
||||||
auto & hash_data = hash.getData();
|
auto & hash_data = hash.getData();
|
||||||
auto & dict_hash_data = dict_hash.getData();
|
const auto & dict_hash_data = dict_hash.getData();
|
||||||
|
|
||||||
auto update_weak_hash = [&](auto x)
|
auto update_weak_hash = [&](auto x)
|
||||||
{
|
{
|
||||||
@ -832,10 +823,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
|
|||||||
auto size = data.size();
|
auto size = data.size();
|
||||||
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
for (size_t i = 0; i < size; ++i)
|
||||||
hash_data[i] = static_cast<UInt32>(intHashCRC32(dict_hash_data[data[i]], hash_data[i]));
|
hash_data[i] = dict_hash_data[data[i]];
|
||||||
};
|
};
|
||||||
|
|
||||||
callForType(std::move(update_weak_hash), size_of_type);
|
callForType(std::move(update_weak_hash), size_of_type);
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnLowCardinality::Index::collectSerializedValueSizes(
|
void ColumnLowCardinality::Index::collectSerializedValueSizes(
|
||||||
|
@ -103,7 +103,7 @@ public:
|
|||||||
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
|
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
|
|
||||||
void updateHashFast(SipHash &) const override;
|
void updateHashFast(SipHash &) const override;
|
||||||
|
|
||||||
@ -313,7 +313,7 @@ public:
|
|||||||
|
|
||||||
bool containsDefault() const;
|
bool containsDefault() const;
|
||||||
|
|
||||||
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const;
|
WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const;
|
||||||
|
|
||||||
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;
|
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;
|
||||||
|
|
||||||
|
@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
nested->updateHashWithValue(n, hash);
|
nested->updateHashWithValue(n, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnMap::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnMap::getWeakHash32() const
|
||||||
{
|
{
|
||||||
nested->updateWeakHash32(hash);
|
return nested->getWeakHash32();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnMap::updateHashFast(SipHash & hash) const
|
void ColumnMap::updateHashFast(SipHash & hash) const
|
||||||
|
@ -64,7 +64,7 @@ public:
|
|||||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||||
const char * skipSerializedInArena(const char * pos) const override;
|
const char * skipSerializedInArena(const char * pos) const override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
void insertFrom(const IColumn & src_, size_t n) override;
|
void insertFrom(const IColumn & src_, size_t n) override;
|
||||||
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
|
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
|
||||||
|
@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
getNestedColumn().updateHashWithValue(n, hash);
|
getNestedColumn().updateHashWithValue(n, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnNullable::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = size();
|
auto s = size();
|
||||||
|
|
||||||
if (hash.getData().size() != s)
|
WeakHash32 hash = nested_column->getWeakHash32();
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
WeakHash32 old_hash = hash;
|
|
||||||
nested_column->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
const auto & null_map_data = getNullMapData();
|
const auto & null_map_data = getNullMapData();
|
||||||
auto & hash_data = hash.getData();
|
auto & hash_data = hash.getData();
|
||||||
auto & old_hash_data = old_hash.getData();
|
|
||||||
|
|
||||||
/// Use old data for nulls.
|
/// Use default for nulls.
|
||||||
for (size_t row = 0; row < s; ++row)
|
for (size_t row = 0; row < s; ++row)
|
||||||
if (null_map_data[row])
|
if (null_map_data[row])
|
||||||
hash_data[row] = old_hash_data[row];
|
hash_data[row] = WeakHash32::kDefaultInitialValue;
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnNullable::updateHashFast(SipHash & hash) const
|
void ColumnNullable::updateHashFast(SipHash & hash) const
|
||||||
|
@ -119,7 +119,7 @@ public:
|
|||||||
void protect() override;
|
void protect() override;
|
||||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
void getExtremes(Field & min, Field & max) const override;
|
void getExtremes(Field & min, Field & max) const override;
|
||||||
// Special function for nullable minmax index
|
// Special function for nullable minmax index
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <Core/Names.h>
|
#include <Core/Names.h>
|
||||||
#include <DataTypes/Serializations/SubcolumnsTree.h>
|
#include <DataTypes/Serializations/SubcolumnsTree.h>
|
||||||
#include <Common/PODArray.h>
|
#include <Common/PODArray.h>
|
||||||
|
#include <Common/WeakHash.h>
|
||||||
|
|
||||||
#include <DataTypes/IDataType.h>
|
#include <DataTypes/IDataType.h>
|
||||||
|
|
||||||
@ -241,7 +242,7 @@ public:
|
|||||||
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
|
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
|
||||||
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
||||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
||||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
|
||||||
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
|
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
|
||||||
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
||||||
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
||||||
|
@ -666,20 +666,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
values->updateHashWithValue(getValueIndex(n), hash);
|
values->updateHashWithValue(getValueIndex(n), hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnSparse::getWeakHash32() const
|
||||||
{
|
{
|
||||||
if (hash.getData().size() != _size)
|
WeakHash32 values_hash = values->getWeakHash32();
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
WeakHash32 hash(size());
|
||||||
"column size is {}, hash size is {}", _size, hash.getData().size());
|
|
||||||
|
auto & hash_data = hash.getData();
|
||||||
|
auto & values_hash_data = values_hash.getData();
|
||||||
|
|
||||||
auto offset_it = begin();
|
auto offset_it = begin();
|
||||||
auto & hash_data = hash.getData();
|
|
||||||
for (size_t i = 0; i < _size; ++i, ++offset_it)
|
for (size_t i = 0; i < _size; ++i, ++offset_it)
|
||||||
{
|
{
|
||||||
size_t value_index = offset_it.getValueIndex();
|
size_t value_index = offset_it.getValueIndex();
|
||||||
auto data_ref = values->getDataAt(value_index);
|
hash_data[i] = values_hash_data[value_index];
|
||||||
hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnSparse::updateHashFast(SipHash & hash) const
|
void ColumnSparse::updateHashFast(SipHash & hash) const
|
||||||
|
@ -127,7 +127,7 @@ public:
|
|||||||
void protect() override;
|
void protect() override;
|
||||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
void getExtremes(Field & min, Field & max) const override;
|
void getExtremes(Field & min, Field & max) const override;
|
||||||
|
|
||||||
|
@ -104,13 +104,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnString::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnString::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = offsets.size();
|
auto s = offsets.size();
|
||||||
|
WeakHash32 hash(s);
|
||||||
if (hash.getData().size() != s)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
const UInt8 * pos = chars.data();
|
const UInt8 * pos = chars.data();
|
||||||
UInt32 * hash_data = hash.getData().data();
|
UInt32 * hash_data = hash.getData().data();
|
||||||
@ -126,6 +123,8 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
|
|||||||
prev_offset = offset;
|
prev_offset = offset;
|
||||||
++hash_data;
|
++hash_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -204,7 +204,7 @@ public:
|
|||||||
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
|
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
|
|
||||||
void updateHashFast(SipHash & hash) const override
|
void updateHashFast(SipHash & hash) const override
|
||||||
{
|
{
|
||||||
|
@ -300,16 +300,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
column->updateHashWithValue(n, hash);
|
column->updateHashWithValue(n, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnTuple::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = size();
|
auto s = size();
|
||||||
|
WeakHash32 hash(s);
|
||||||
if (hash.getData().size() != s)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
for (const auto & column : columns)
|
for (const auto & column : columns)
|
||||||
column->updateWeakHash32(hash);
|
hash.update(column->getWeakHash32());
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnTuple::updateHashFast(SipHash & hash) const
|
void ColumnTuple::updateHashFast(SipHash & hash) const
|
||||||
|
@ -74,7 +74,7 @@ public:
|
|||||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||||
const char * skipSerializedInArena(const char * pos) const override;
|
const char * skipSerializedInArena(const char * pos) const override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
|
@ -777,36 +777,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
|
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnVariant::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = size();
|
auto s = size();
|
||||||
|
|
||||||
if (hash.getData().size() != s)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
/// If we have only NULLs, keep hash unchanged.
|
/// If we have only NULLs, keep hash unchanged.
|
||||||
if (hasOnlyNulls())
|
if (hasOnlyNulls())
|
||||||
return;
|
return WeakHash32(s);
|
||||||
|
|
||||||
/// Optimization for case when there is only 1 non-empty variant and no NULLs.
|
/// Optimization for case when there is only 1 non-empty variant and no NULLs.
|
||||||
/// In this case we can just calculate weak hash for this variant.
|
/// In this case we can just calculate weak hash for this variant.
|
||||||
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||||
{
|
return variants[*non_empty_local_discr]->getWeakHash32();
|
||||||
variants[*non_empty_local_discr]->updateWeakHash32(hash);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Calculate weak hash for all variants.
|
/// Calculate weak hash for all variants.
|
||||||
std::vector<WeakHash32> nested_hashes;
|
std::vector<WeakHash32> nested_hashes;
|
||||||
for (const auto & variant : variants)
|
for (const auto & variant : variants)
|
||||||
{
|
nested_hashes.emplace_back(variant->getWeakHash32());
|
||||||
WeakHash32 nested_hash(variant->size());
|
|
||||||
variant->updateWeakHash32(nested_hash);
|
|
||||||
nested_hashes.emplace_back(std::move(nested_hash));
|
|
||||||
}
|
|
||||||
|
|
||||||
/// For each row hash is a hash of corresponding row from corresponding variant.
|
/// For each row hash is a hash of corresponding row from corresponding variant.
|
||||||
|
WeakHash32 hash(s);
|
||||||
auto & hash_data = hash.getData();
|
auto & hash_data = hash.getData();
|
||||||
const auto & local_discriminators_data = getLocalDiscriminators();
|
const auto & local_discriminators_data = getLocalDiscriminators();
|
||||||
const auto & offsets_data = getOffsets();
|
const auto & offsets_data = getOffsets();
|
||||||
@ -815,11 +805,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
|
|||||||
Discriminator discr = local_discriminators_data[i];
|
Discriminator discr = local_discriminators_data[i];
|
||||||
/// Update hash only for non-NULL values
|
/// Update hash only for non-NULL values
|
||||||
if (discr != NULL_DISCRIMINATOR)
|
if (discr != NULL_DISCRIMINATOR)
|
||||||
{
|
hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]];
|
||||||
auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]];
|
|
||||||
hash_data[i] = static_cast<UInt32>(hashCRC32(nested_hash, hash_data[i]));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnVariant::updateHashFast(SipHash & hash) const
|
void ColumnVariant::updateHashFast(SipHash & hash) const
|
||||||
|
@ -203,7 +203,7 @@ public:
|
|||||||
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
|
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
|
||||||
const char * skipSerializedInArena(const char * pos) const override;
|
const char * skipSerializedInArena(const char * pos) const override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
void expand(const Filter & mask, bool inverted) override;
|
void expand(const Filter & mask, bool inverted) override;
|
||||||
|
@ -73,13 +73,10 @@ void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
WeakHash32 ColumnVector<T>::getWeakHash32() const
|
||||||
{
|
{
|
||||||
auto s = data.size();
|
auto s = data.size();
|
||||||
|
WeakHash32 hash(s);
|
||||||
if (hash.getData().size() != s)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
|
||||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
|
||||||
|
|
||||||
const T * begin = data.data();
|
const T * begin = data.data();
|
||||||
const T * end = begin + s;
|
const T * end = begin + s;
|
||||||
@ -91,6 +88,8 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
|||||||
++begin;
|
++begin;
|
||||||
++hash_data;
|
++hash_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -106,7 +106,7 @@ public:
|
|||||||
|
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
WeakHash32 getWeakHash32() const override;
|
||||||
|
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash & hash) const override;
|
||||||
|
|
||||||
|
@ -277,10 +277,10 @@ public:
|
|||||||
/// passed bytes to hash must identify sequence of values unambiguously.
|
/// passed bytes to hash must identify sequence of values unambiguously.
|
||||||
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
|
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
|
||||||
|
|
||||||
/// Update hash function value. Hash is calculated for each element.
|
/// Get hash function value. Hash is calculated for each element.
|
||||||
/// It's a fast weak hash function. Mainly need to scatter data between threads.
|
/// It's a fast weak hash function. Mainly need to scatter data between threads.
|
||||||
/// WeakHash32 must have the same size as column.
|
/// WeakHash32 must have the same size as column.
|
||||||
virtual void updateWeakHash32(WeakHash32 & hash) const = 0;
|
virtual WeakHash32 getWeakHash32() const = 0;
|
||||||
|
|
||||||
/// Update state of hash with all column.
|
/// Update state of hash with all column.
|
||||||
virtual void updateHashFast(SipHash & hash) const = 0;
|
virtual void updateHashFast(SipHash & hash) const = 0;
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Columns/IColumn.h>
|
#include <Columns/IColumn.h>
|
||||||
|
#include <Common/WeakHash.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -59,8 +60,9 @@ public:
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 & /*hash*/) const override
|
WeakHash32 getWeakHash32() const override
|
||||||
{
|
{
|
||||||
|
return WeakHash32(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateHashFast(SipHash & /*hash*/) const override
|
void updateHashFast(SipHash & /*hash*/) const override
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <Columns/IColumn.h>
|
#include <Columns/IColumn.h>
|
||||||
|
#include <Common/WeakHash.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -162,9 +163,9 @@ public:
|
|||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateWeakHash32(WeakHash32 &) const override
|
WeakHash32 getWeakHash32() const override
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique.");
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique.");
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateHashFast(SipHash &) const override
|
void updateHashFast(SipHash &) const override
|
||||||
|
@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8)
|
|||||||
data.push_back(i);
|
data.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8)
|
|||||||
data.push_back(i);
|
data.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16)
|
|||||||
data.push_back(i);
|
data.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16)
|
|||||||
data.push_back(i);
|
data.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32)
|
|||||||
data.push_back(i << 16u);
|
data.push_back(i << 16u);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32)
|
|||||||
data.push_back(i << 16);
|
data.push_back(i << 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64)
|
|||||||
data.push_back(i << 32u);
|
data.push_back(i << 32u);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64)
|
|||||||
data.push_back(i << 32);
|
data.push_back(i << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq_data);
|
checkColumn(hash.getData(), eq_data);
|
||||||
}
|
}
|
||||||
@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128)
|
|||||||
data.push_back(i << 32);
|
data.push_back(i << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32)
|
|||||||
data.push_back(i << 16);
|
data.push_back(i << 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64)
|
|||||||
data.push_back(i << 32);
|
data.push_back(i << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128)
|
|||||||
data.push_back(i << 32);
|
data.push_back(i << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), col->getData());
|
checkColumn(hash.getData(), col->getData());
|
||||||
}
|
}
|
||||||
@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), data);
|
checkColumn(hash.getData(), data);
|
||||||
}
|
}
|
||||||
@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), data);
|
checkColumn(hash.getData(), data);
|
||||||
}
|
}
|
||||||
@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), data);
|
checkColumn(hash.getData(), data);
|
||||||
}
|
}
|
||||||
@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), data);
|
checkColumn(hash.getData(), data);
|
||||||
}
|
}
|
||||||
@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray)
|
|||||||
|
|
||||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||||
|
|
||||||
WeakHash32 hash(col_arr->size());
|
WeakHash32 hash = col_arr->getWeakHash32();
|
||||||
col_arr->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq_data);
|
checkColumn(hash.getData(), eq_data);
|
||||||
}
|
}
|
||||||
@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2)
|
|||||||
|
|
||||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||||
|
|
||||||
WeakHash32 hash(col_arr->size());
|
WeakHash32 hash = col_arr->getWeakHash32();
|
||||||
col_arr->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq_data);
|
checkColumn(hash.getData(), eq_data);
|
||||||
}
|
}
|
||||||
@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray)
|
|||||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||||
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
|
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
|
||||||
|
|
||||||
WeakHash32 hash(col_arr_arr->size());
|
WeakHash32 hash = col_arr_arr->getWeakHash32();
|
||||||
col_arr_arr->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq_data);
|
checkColumn(hash.getData(), eq_data);
|
||||||
}
|
}
|
||||||
@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst)
|
|||||||
|
|
||||||
auto col_const = ColumnConst::create(std::move(inner_col), 256);
|
auto col_const = ColumnConst::create(std::move(inner_col), 256);
|
||||||
|
|
||||||
WeakHash32 hash(col_const->size());
|
WeakHash32 hash = col_const->getWeakHash32();
|
||||||
col_const->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), data);
|
checkColumn(hash.getData(), data);
|
||||||
}
|
}
|
||||||
@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WeakHash32 hash(col->size());
|
WeakHash32 hash = col->getWeakHash32();
|
||||||
col->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), data);
|
checkColumn(hash.getData(), data);
|
||||||
}
|
}
|
||||||
@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable)
|
|||||||
|
|
||||||
auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
|
auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
|
||||||
|
|
||||||
WeakHash32 hash(col_null->size());
|
WeakHash32 hash = col_null->getWeakHash32();
|
||||||
col_null->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq);
|
checkColumn(hash.getData(), eq);
|
||||||
}
|
}
|
||||||
@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64)
|
|||||||
columns.emplace_back(std::move(col2));
|
columns.emplace_back(std::move(col2));
|
||||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||||
|
|
||||||
WeakHash32 hash(col_tuple->size());
|
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||||
col_tuple->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq);
|
checkColumn(hash.getData(), eq);
|
||||||
}
|
}
|
||||||
@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
|
|||||||
columns.emplace_back(std::move(col2));
|
columns.emplace_back(std::move(col2));
|
||||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||||
|
|
||||||
WeakHash32 hash(col_tuple->size());
|
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||||
col_tuple->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq);
|
checkColumn(hash.getData(), eq);
|
||||||
}
|
}
|
||||||
@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString)
|
|||||||
columns.emplace_back(std::move(col2));
|
columns.emplace_back(std::move(col2));
|
||||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||||
|
|
||||||
WeakHash32 hash(col_tuple->size());
|
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||||
col_tuple->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq);
|
checkColumn(hash.getData(), eq);
|
||||||
}
|
}
|
||||||
@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
|
|||||||
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
|
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
|
||||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||||
|
|
||||||
WeakHash32 hash(col_tuple->size());
|
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||||
col_tuple->updateWeakHash32(hash);
|
|
||||||
|
|
||||||
checkColumn(hash.getData(), eq_data);
|
checkColumn(hash.getData(), eq_data);
|
||||||
}
|
}
|
||||||
|
@ -1,2 +1,24 @@
|
|||||||
#include <Common/WeakHash.h>
|
#include <Common/WeakHash.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
#include <Common/HashTable/Hash.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
void WeakHash32::update(const WeakHash32 & other)
|
||||||
|
{
|
||||||
|
size_t size = data.size();
|
||||||
|
if (size != other.data.size())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:"
|
||||||
|
"left size is {}, right size is {}", size, other.data.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
data[i] = static_cast<UInt32>(intHashCRC32(other.data[i], data[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -11,9 +11,8 @@ namespace DB
|
|||||||
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
|
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
|
||||||
class WeakHash32
|
class WeakHash32
|
||||||
{
|
{
|
||||||
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
|
||||||
|
|
||||||
using Container = PaddedPODArray<UInt32>;
|
using Container = PaddedPODArray<UInt32>;
|
||||||
|
|
||||||
@ -22,6 +21,8 @@ public:
|
|||||||
|
|
||||||
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
|
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
|
||||||
|
|
||||||
|
void update(const WeakHash32 & other);
|
||||||
|
|
||||||
const Container & getData() const { return data; }
|
const Container & getData() const { return data; }
|
||||||
Container & getData() { return data; }
|
Container & getData() { return data; }
|
||||||
|
|
||||||
|
@ -271,7 +271,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co
|
|||||||
{
|
{
|
||||||
const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst();
|
const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst();
|
||||||
const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col));
|
const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col));
|
||||||
key_col_no_lc->updateWeakHash32(hash);
|
hash.update(key_col_no_lc->getWeakHash32());
|
||||||
}
|
}
|
||||||
return hashToSelector(hash, num_shards);
|
return hashToSelector(hash, num_shards);
|
||||||
}
|
}
|
||||||
|
@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl
|
|||||||
for (const auto & key_name : key_columns_names)
|
for (const auto & key_name : key_columns_names)
|
||||||
{
|
{
|
||||||
ColumnPtr key_col = materializeColumn(block, key_name);
|
ColumnPtr key_col = materializeColumn(block, key_name);
|
||||||
key_col->updateWeakHash32(hash);
|
hash.update(key_col->getWeakHash32());
|
||||||
}
|
}
|
||||||
auto selector = hashToSelector(hash, sharder);
|
auto selector = hashToSelector(hash, sharder);
|
||||||
|
|
||||||
|
@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks()
|
|||||||
hash.reset(num_rows);
|
hash.reset(num_rows);
|
||||||
|
|
||||||
for (const auto & column_number : key_columns)
|
for (const auto & column_number : key_columns)
|
||||||
columns[column_number]->updateWeakHash32(hash);
|
hash.update(columns[column_number]->getWeakHash32());
|
||||||
|
|
||||||
const auto & hash_data = hash.getData();
|
const auto & hash_data = hash.getData();
|
||||||
IColumn::Selector selector(num_rows);
|
IColumn::Selector selector(num_rows);
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
false 1 1
|
||||||
|
true 1 1
|
||||||
|
---
|
||||||
|
false 1 1
|
||||||
|
false 1 2
|
||||||
|
false 1 3
|
||||||
|
true 1 1
|
||||||
|
true 1 2
|
||||||
|
---
|
||||||
|
-755809149 0
|
@ -0,0 +1,33 @@
|
|||||||
|
create table t(c Int32, d Bool) Engine=MergeTree order by c;
|
||||||
|
system stop merges t;
|
||||||
|
|
||||||
|
insert into t values (1, 0);
|
||||||
|
insert into t values (1, 0);
|
||||||
|
insert into t values (1, 1);
|
||||||
|
insert into t values (1, 0)(1, 1);
|
||||||
|
|
||||||
|
SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t qualify c8=1 order by d settings max_threads=2, allow_experimental_analyzer = 1;
|
||||||
|
SELECT '---';
|
||||||
|
SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t order by d, c8 settings max_threads=2;
|
||||||
|
SELECT '---';
|
||||||
|
|
||||||
|
drop table t;
|
||||||
|
|
||||||
|
create table t (
|
||||||
|
c Int32 primary key ,
|
||||||
|
s Bool ,
|
||||||
|
w Float64
|
||||||
|
);
|
||||||
|
|
||||||
|
system stop merges t;
|
||||||
|
|
||||||
|
insert into t values(439499072,true,0),(1393290072,true,0);
|
||||||
|
insert into t values(-1317193174,false,0),(1929066636,false,0);
|
||||||
|
insert into t values(-2,false,0),(1962246186,true,0),(2054878592,false,0);
|
||||||
|
insert into t values(-1893563136,true,41.55);
|
||||||
|
insert into t values(-1338380855,true,-0.7),(-991301833,true,0),(-755809149,false,43.18),(-41,true,0),(3,false,0),(255,false,0),(255,false,0),(189195893,false,0),(195550885,false,9223372036854776000);
|
||||||
|
|
||||||
|
SELECT * FROM (
|
||||||
|
SELECT c, min(w) OVER (PARTITION BY s ORDER BY c ASC, s ASC, w ASC)
|
||||||
|
FROM t limit toUInt64(-1))
|
||||||
|
WHERE c = -755809149;
|
Loading…
Reference in New Issue
Block a user