Refactor IColumn::forEachSubcolumn to make it slightly harder to implement incorrectly

This commit is contained in:
Michael Kolupaev 2023-06-16 07:16:50 +00:00
parent c2900177a2
commit 10d597676c
16 changed files with 55 additions and 96 deletions

View File

@ -4,7 +4,7 @@ if (SANITIZE OR NOT (
))
if (ENABLE_JEMALLOC)
message (${RECONFIGURE_MESSAGE_LEVEL}
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds. Use -DENABLE_JEMALLOC=0")
endif ()
set (ENABLE_JEMALLOC OFF)
else ()

View File

@ -151,13 +151,13 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) const override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(offsets);
callback(data);
}
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(*offsets);
offsets->forEachSubcolumnRecursively(callback);

View File

@ -166,7 +166,7 @@ public:
size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
void forEachSubcolumn(ColumnCallback callback) const override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(idx.getPositionsPtr());
@ -175,7 +175,7 @@ public:
callback(dictionary.getColumnUniquePtr());
}
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(*idx.getPositionsPtr());
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);

View File

@ -273,12 +273,12 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
max = std::move(map_max_value);
}
void ColumnMap::forEachSubcolumn(ColumnCallback callback) const
void ColumnMap::forEachSubcolumn(MutableColumnCallback callback)
{
callback(nested);
}
void ColumnMap::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
void ColumnMap::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
callback(*nested);
nested->forEachSubcolumnRecursively(callback);

View File

@ -88,8 +88,8 @@ public:
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) const override;
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;

View File

@ -130,13 +130,13 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) const override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(nested_column);
callback(null_map);
}
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(*nested_column);
nested_column->forEachSubcolumnRecursively(callback);

View File

@ -664,18 +664,18 @@ size_t ColumnObject::allocatedBytes() const
return res;
}
void ColumnObject::forEachSubcolumn(ColumnCallback callback) const
void ColumnObject::forEachSubcolumn(MutableColumnCallback callback)
{
for (const auto & entry : subcolumns)
for (const auto & part : entry->data.data)
for (auto & entry : subcolumns)
for (auto & part : entry->data.data)
callback(part);
}
void ColumnObject::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
for (const auto & entry : subcolumns)
for (auto & entry : subcolumns)
{
for (const auto & part : entry->data.data)
for (auto & part : entry->data.data)
{
callback(*part);
part->forEachSubcolumnRecursively(callback);

View File

@ -206,8 +206,8 @@ public:
size_t size() const override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void forEachSubcolumn(ColumnCallback callback) const override;
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;

View File

@ -751,13 +751,13 @@ bool ColumnSparse::structureEquals(const IColumn & rhs) const
return false;
}
void ColumnSparse::forEachSubcolumn(ColumnCallback callback) const
void ColumnSparse::forEachSubcolumn(MutableColumnCallback callback)
{
callback(values);
callback(offsets);
}
void ColumnSparse::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
void ColumnSparse::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
callback(*values);
values->forEachSubcolumnRecursively(callback);

View File

@ -140,8 +140,8 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) const override;
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;

View File

@ -31,14 +31,12 @@ ColumnString::ColumnString(const ColumnString & src)
offsets(src.offsets.begin(), src.offsets.end()),
chars(src.chars.begin(), src.chars.end())
{
if (!offsets.empty())
{
Offset last_offset = offsets.back();
/// This will also prevent possible overflow in offset.
if (chars.size() != last_offset)
throw Exception(ErrorCodes::LOGICAL_ERROR, "String offsets has data inconsistent with chars array");
}
Offset last_offset = offsets.empty() ? 0 : offsets.back();
/// This will also prevent possible overflow in offset.
if (last_offset != chars.size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"String offsets has data inconsistent with chars array. Last offset: {}, array length: {}",
last_offset, chars.size());
}
@ -157,6 +155,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
Offsets & res_offsets = res->offsets;
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
return res;
}
@ -571,10 +570,11 @@ void ColumnString::protect()
void ColumnString::validate() const
{
if (!offsets.empty() && offsets.back() != chars.size())
Offset last_offset = offsets.empty() ? 0 : offsets.back();
if (last_offset != chars.size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"ColumnString validation failed: size mismatch (internal logical error) {} != {}",
offsets.back(), chars.size());
last_offset, chars.size());
}
}

View File

@ -495,15 +495,15 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const
max = max_tuple;
}
void ColumnTuple::forEachSubcolumn(ColumnCallback callback) const
void ColumnTuple::forEachSubcolumn(MutableColumnCallback callback)
{
for (const auto & column : columns)
for (auto & column : columns)
callback(column);
}
void ColumnTuple::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
void ColumnTuple::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
for (const auto & column : columns)
for (auto & column : columns)
{
callback(*column);
column->forEachSubcolumnRecursively(callback);

View File

@ -96,8 +96,8 @@ public:
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) const override;
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
ColumnPtr compress() const override;

View File

@ -62,19 +62,19 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
return res;
}
void IColumn::forEachSubcolumn(MutableColumnCallback callback)
void IColumn::forEachSubcolumn(ColumnCallback callback) const
{
std::as_const(*this).forEachSubcolumn([&callback](const WrappedPtr & subcolumn)
const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)
{
callback(const_cast<WrappedPtr &>(subcolumn));
callback(std::as_const(subcolumn));
});
}
void IColumn::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
{
std::as_const(*this).forEachSubcolumnRecursively([&callback](const IColumn & subcolumn)
const_cast<IColumn*>(this)->forEachSubcolumnRecursively([&callback](IColumn & subcolumn)
{
callback(const_cast<IColumn &>(subcolumn));
callback(std::as_const(subcolumn));
});
}

View File

@ -418,21 +418,23 @@ public:
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
/// Shallow: doesn't do recursive calls; don't do call for itself.
using ColumnCallback = std::function<void(const WrappedPtr &)>;
virtual void forEachSubcolumn(ColumnCallback) const {}
using MutableColumnCallback = std::function<void(WrappedPtr &)>;
virtual void forEachSubcolumn(MutableColumnCallback callback);
virtual void forEachSubcolumn(MutableColumnCallback) {}
/// Default implementation calls the mutable overload using const_cast.
using ColumnCallback = std::function<void(const WrappedPtr &)>;
virtual void forEachSubcolumn(ColumnCallback) const;
/// Similar to forEachSubcolumn but it also do recursive calls.
/// In recursive calls it's prohibited to replace pointers
/// to subcolumns, so we use another callback function.
using RecursiveColumnCallback = std::function<void(const IColumn &)>;
virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const {}
using RecursiveMutableColumnCallback = std::function<void(IColumn &)>;
virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback);
virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback) {}
/// Default implementation calls the mutable overload using const_cast.
using RecursiveColumnCallback = std::function<void(const IColumn &)>;
virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const;
/// Columns have equal structure.
/// If true - you can use "compareAt", "insertFrom", etc. methods.

View File

@ -300,49 +300,6 @@ namespace
MutableColumnPtr additional_keys_map;
};
template <typename T>
IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeysRef(PaddedPODArray<T> & index, size_t dict_size)
{
PaddedPODArray<T> copy(index.cbegin(), index.cend());
HashMap<T, T> dict_map;
HashMap<T, T> add_keys_map;
for (auto val : index)
{
if (val < dict_size)
dict_map.insert({val, dict_map.size()});
else
add_keys_map.insert({val, add_keys_map.size()});
}
auto dictionary_map = ColumnVector<T>::create(dict_map.size());
auto additional_keys_map = ColumnVector<T>::create(add_keys_map.size());
auto & dict_data = dictionary_map->getData();
auto & add_keys_data = additional_keys_map->getData();
for (auto val : dict_map)
dict_data[val.second] = val.first;
for (auto val : add_keys_map)
add_keys_data[val.second] = val.first - dict_size;
for (auto & val : index)
val = val < dict_size ? dict_map[val]
: add_keys_map[val] + dict_map.size();
for (size_t i = 0; i < index.size(); ++i)
{
T expected = index[i] < dict_data.size() ? dict_data[index[i]]
: add_keys_data[index[i] - dict_data.size()] + dict_size;
if (expected != copy[i])
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {}, but got {}", toString(expected), toString(copy[i]));
}
return {std::move(dictionary_map), std::move(additional_keys_map)};
}
template <typename T>
IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeys(PaddedPODArray<T> & index, size_t dict_size)
{