ClickHouse/src/Columns/ColumnNullable.cpp

628 lines
19 KiB
C++
Raw Normal View History

#include <Common/Arena.h>
#include <Common/SipHash.h>
#include <Common/NaNUtils.h>
2017-07-13 20:58:19 +00:00
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Common/WeakHash.h>
#include <Columns/ColumnNullable.h>
2019-06-27 18:50:20 +00:00
#include <Columns/ColumnConst.h>
#include <DataStreams/ColumnGathererStream.h>
2017-03-11 01:25:27 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
2017-04-17 20:19:09 +00:00
extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT;
}
2017-03-11 01:25:27 +00:00
2018-03-20 14:17:09 +00:00
ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_)
: nested_column(std::move(nested_column_)), null_map(std::move(null_map_))
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
2017-03-06 21:36:33 +00:00
if (!getNestedColumn().canBeInsideNullable())
throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column", ErrorCodes::ILLEGAL_COLUMN};
if (isColumnConst(*null_map))
throw Exception{"ColumnNullable cannot have constant null map", ErrorCodes::ILLEGAL_COLUMN};
}
2016-07-11 10:09:16 +00:00
void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
{
const auto & arr = getNullMapData();
2018-03-03 15:36:20 +00:00
hash.update(arr[n]);
if (arr[n] == 0)
getNestedColumn().updateHashWithValue(n, hash);
2016-07-11 10:09:16 +00:00
}
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const
{
auto s = size();
if (hash.getData().size() != s)
throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) +
", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR);
WeakHash32 old_hash = hash;
nested_column->updateWeakHash32(hash);
2020-04-22 06:22:14 +00:00
const auto & null_map_data = getNullMapData();
auto & hash_data = hash.getData();
auto & old_hash_data = old_hash.getData();
/// Use old data for nulls.
for (size_t row = 0; row < s; ++row)
if (null_map_data[row])
hash_data[row] = old_hash_data[row];
}
void ColumnNullable::updateHashFast(SipHash & hash) const
{
null_map->updateHashFast(hash);
nested_column->updateHashFast(hash);
}
MutableColumnPtr ColumnNullable::cloneResized(size_t new_size) const
{
2018-03-20 14:17:09 +00:00
MutableColumnPtr new_nested_col = getNestedColumn().cloneResized(new_size);
auto new_null_map = ColumnUInt8::create();
2017-01-04 04:30:18 +00:00
if (new_size > 0)
{
new_null_map->getData().resize(new_size);
2017-01-04 04:30:18 +00:00
size_t count = std::min(size(), new_size);
memcpy(new_null_map->getData().data(), getNullMapData().data(), count * sizeof(getNullMapData()[0]));
2017-01-04 04:30:18 +00:00
/// If resizing to bigger one, set all new values to NULLs.
if (new_size > count)
memset(&new_null_map->getData()[count], 1, new_size - count);
}
2017-01-04 04:30:18 +00:00
2018-03-20 14:17:09 +00:00
return ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map));
}
Field ColumnNullable::operator[](size_t n) const
{
return isNullAt(n) ? Null() : getNestedColumn()[n];
}
void ColumnNullable::get(size_t n, Field & res) const
{
if (isNullAt(n))
res = Null();
else
getNestedColumn().get(n, res);
}
2019-05-23 13:35:26 +00:00
void ColumnNullable::insertData(const char * pos, size_t length)
{
2019-05-23 13:35:26 +00:00
if (pos == nullptr)
{
getNestedColumn().insertDefault();
getNullMapData().push_back(1);
}
else
{
getNestedColumn().insertData(pos, length);
getNullMapData().push_back(0);
}
}
StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
const auto & arr = getNullMapData();
static constexpr auto s = sizeof(arr[0]);
2016-08-16 12:53:22 +00:00
2020-04-22 06:22:14 +00:00
auto * pos = arena.allocContinue(s, begin);
memcpy(pos, &arr[n], s);
2016-08-16 12:53:22 +00:00
if (arr[n])
return StringRef(pos, s);
auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
/// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
return StringRef(nested_ref.data - s, nested_ref.size + s);
}
const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
{
2020-01-03 15:28:38 +00:00
UInt8 val = unalignedLoad<UInt8>(pos);
pos += sizeof(val);
2016-08-16 12:53:22 +00:00
getNullMapData().push_back(val);
2016-08-16 12:53:22 +00:00
if (val == 0)
pos = getNestedColumn().deserializeAndInsertFromArena(pos);
else
getNestedColumn().insertDefault();
return pos;
}
void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(src);
getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length);
getNestedColumn().insertRangeFrom(*nullable_col.nested_column, start, length);
}
void ColumnNullable::insert(const Field & x)
{
if (x.isNull())
{
getNestedColumn().insertDefault();
getNullMapData().push_back(1);
}
else
{
getNestedColumn().insert(x);
getNullMapData().push_back(0);
}
}
void ColumnNullable::insertFrom(const IColumn & src, size_t n)
{
const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n);
getNullMapData().push_back(src_concrete.getNullMapData()[n]);
}
void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n)
{
getNestedColumn().insertFrom(src, n);
getNullMapData().push_back(0);
}
void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length)
{
getNestedColumn().insertRangeFrom(src, start, length);
getNullMapData().resize_fill(getNullMapData().size() + length, 0);
}
void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
insertFromNotNullable(src, position);
}
void ColumnNullable::popBack(size_t n)
{
getNestedColumn().popBack(n);
getNullMapColumn().popBack(n);
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint) const
{
ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint);
ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint);
return ColumnNullable::create(filtered_data, filtered_null_map);
}
ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const
{
ColumnPtr permuted_data = getNestedColumn().permute(perm, limit);
ColumnPtr permuted_null_map = getNullMapColumn().permute(perm, limit);
return ColumnNullable::create(permuted_data, permuted_null_map);
}
ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
2018-04-23 16:40:25 +00:00
{
ColumnPtr indexed_data = getNestedColumn().index(indexes, limit);
ColumnPtr indexed_null_map = getNullMapColumn().index(indexes, limit);
return ColumnNullable::create(indexed_data, indexed_null_map);
}
2016-08-16 11:26:17 +00:00
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
{
/// NULL values share the properties of NaN values.
/// Here the last parameter of compareAt is called null_direction_hint
/// instead of the usual nan_direction_hint and is used to implement
/// the ordering specified by either NULLS FIRST or NULLS LAST in the
/// ORDER BY construction.
const ColumnNullable & nullable_rhs = assert_cast<const ColumnNullable &>(rhs_);
bool lval_is_null = isNullAt(n);
bool rval_is_null = nullable_rhs.isNullAt(m);
if (unlikely(lval_is_null || rval_is_null))
{
if (lval_is_null && rval_is_null)
return 0;
else
return lval_is_null ? null_direction_hint : -null_direction_hint;
}
const IColumn & nested_rhs = nullable_rhs.getNestedColumn();
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
}
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
2020-06-17 11:43:55 +00:00
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
2020-06-01 12:10:32 +00:00
{
2020-06-17 11:43:55 +00:00
return doCompareColumn<ColumnNullable>(assert_cast<const ColumnNullable &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
2020-06-01 12:10:32 +00:00
}
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
{
/// Cannot pass limit because of unknown amount of NULLs.
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
if ((null_direction_hint > 0) != reverse)
{
/// Shift all NULL values to the end.
size_t read_idx = 0;
size_t write_idx = 0;
size_t end_idx = res.size();
if (!limit)
limit = end_idx;
else
limit = std::min(end_idx, limit);
while (read_idx < limit && !isNullAt(res[read_idx]))
{
++read_idx;
++write_idx;
}
++read_idx;
/// Invariants:
/// write_idx < read_idx
/// write_idx points to NULL
/// read_idx will be incremented to position of next not-NULL
/// there are range of NULLs between write_idx and read_idx - 1,
/// We are moving elements from end to begin of this range,
/// so range will "bubble" towards the end.
/// Relative order of NULL elements could be changed,
/// but relative order of non-NULLs is preserved.
while (read_idx < end_idx && write_idx < limit)
{
if (!isNullAt(res[read_idx]))
{
std::swap(res[read_idx], res[write_idx]);
++write_idx;
}
++read_idx;
}
}
else
{
/// Shift all NULL values to the beginning.
ssize_t read_idx = res.size() - 1;
ssize_t write_idx = res.size() - 1;
while (read_idx >= 0 && !isNullAt(res[read_idx]))
{
--read_idx;
--write_idx;
}
--read_idx;
while (read_idx >= 0 && write_idx >= 0)
{
if (!isNullAt(res[read_idx]))
{
std::swap(res[read_idx], res[write_idx]);
--write_idx;
}
--read_idx;
}
}
}
2020-09-04 14:36:08 +00:00
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
2020-05-14 21:00:56 +00:00
{
2020-09-04 18:05:06 +00:00
if (equal_ranges.empty())
return;
2020-09-04 14:36:08 +00:00
if (limit >= equal_ranges.back().second || limit >= size())
2020-05-12 00:58:58 +00:00
limit = 0;
2020-09-04 16:53:50 +00:00
/// We will sort nested columns into `new_ranges` and call updatePermutation in next columns with `null_ranges`.
EqualRanges new_ranges, null_ranges;
2020-05-12 00:58:58 +00:00
2020-09-04 14:36:08 +00:00
const auto is_nulls_last = ((null_direction_hint > 0) != reverse);
if (is_nulls_last)
2020-05-14 21:00:56 +00:00
{
2020-05-12 00:58:58 +00:00
/// Shift all NULL values to the end.
2020-09-04 14:36:08 +00:00
for (const auto & [first, last] : equal_ranges)
2020-05-12 00:58:58 +00:00
{
2020-09-04 14:36:08 +00:00
/// Consider a half interval [first, last)
size_t read_idx = first;
size_t write_idx = first;
size_t end_idx = last;
2020-05-12 00:58:58 +00:00
2020-09-04 14:36:08 +00:00
if (!limit)
2020-09-07 13:15:04 +00:00
limit = end_idx - read_idx;
2020-09-04 14:36:08 +00:00
else
2020-09-07 13:15:04 +00:00
limit = std::min(end_idx - read_idx, limit);
2020-05-12 00:58:58 +00:00
2020-09-07 13:15:04 +00:00
/// We simply check the limit not to do extra work.
/// Since interval begins from `first`, not from zero, we add `first` to the right side of the inequality.
while (read_idx < first + limit && !isNullAt(res[read_idx]))
2020-05-12 00:58:58 +00:00
{
2020-09-04 14:36:08 +00:00
++read_idx;
2020-05-12 00:58:58 +00:00
++write_idx;
}
2020-09-04 14:36:08 +00:00
2020-05-12 00:58:58 +00:00
++read_idx;
2020-05-18 11:38:22 +00:00
2020-09-04 14:36:08 +00:00
/// Invariants:
/// write_idx < read_idx
/// write_idx points to NULL
/// read_idx will be incremented to position of next not-NULL
/// there are range of NULLs between write_idx and read_idx - 1,
/// We are moving elements from end to begin of this range,
/// so range will "bubble" towards the end.
/// Relative order of NULL elements could be changed,
/// but relative order of non-NULLs is preserved.
2020-09-07 13:15:04 +00:00
while (read_idx < end_idx && write_idx < first + limit)
2020-09-04 14:36:08 +00:00
{
if (!isNullAt(res[read_idx]))
{
std::swap(res[read_idx], res[write_idx]);
++write_idx;
}
++read_idx;
}
/// We have a range [first, write_idx) of non-NULL values
if (first != write_idx)
2020-05-12 00:58:58 +00:00
new_ranges.emplace_back(first, write_idx);
2020-05-18 11:38:22 +00:00
2020-09-04 14:36:08 +00:00
/// We have a range [write_idx, list) of NULL values
if (write_idx != last)
2020-09-04 16:53:50 +00:00
null_ranges.emplace_back(write_idx, last);
2020-05-12 00:58:58 +00:00
}
}
2020-09-04 14:36:08 +00:00
else
{
for (const auto & [first, last] : equal_ranges)
{
/// Shift all NULL values to the beginning.
ssize_t read_idx = last - 1;
ssize_t write_idx = last - 1;
ssize_t begin_idx = first;
while (read_idx >= begin_idx && !isNullAt(res[read_idx]))
{
--read_idx;
--write_idx;
}
--read_idx;
while (read_idx >= begin_idx && write_idx >= begin_idx)
{
if (!isNullAt(res[read_idx]))
{
std::swap(res[read_idx], res[write_idx]);
--write_idx;
}
--read_idx;
}
/// We have a range [write_idx+1, last) of non-NULL values
if (write_idx != static_cast<ssize_t>(last))
new_ranges.emplace_back(write_idx + 1, last);
/// We have a range [first, write_idx+1) of NULL values
if (static_cast<ssize_t>(first) != write_idx)
2020-09-04 16:53:50 +00:00
null_ranges.emplace_back(first, write_idx + 1);
2020-09-04 14:36:08 +00:00
}
}
2020-05-18 11:38:22 +00:00
2020-09-04 14:36:08 +00:00
getNestedColumn().updatePermutation(reverse, 0, null_direction_hint, res, new_ranges);
2020-05-18 11:38:22 +00:00
2020-09-04 14:36:08 +00:00
equal_ranges = std::move(new_ranges);
2020-09-04 18:05:06 +00:00
std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges));
2020-05-12 00:58:58 +00:00
}
void ColumnNullable::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnNullable::reserve(size_t n)
{
getNestedColumn().reserve(n);
getNullMapData().reserve(n);
}
size_t ColumnNullable::byteSize() const
{
return getNestedColumn().byteSize() + getNullMapColumn().byteSize();
}
size_t ColumnNullable::allocatedBytes() const
{
return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes();
}
void ColumnNullable::protect()
{
getNestedColumn().protect();
getNullMapColumn().protect();
}
2016-08-16 11:26:17 +00:00
namespace
{
/// The following function implements a slightly more general version
/// of getExtremes() than the implementation from ColumnVector.
/// It takes into account the possible presence of nullable values.
template <typename T>
2017-03-29 11:33:07 +00:00
void getExtremesFromNullableContent(const ColumnVector<T> & col, const NullMap & null_map, Field & min, Field & max)
2016-08-16 11:26:17 +00:00
{
const auto & data = col.getData();
size_t size = data.size();
if (size == 0)
{
min = Null();
max = Null();
return;
}
bool has_not_null = false;
bool has_not_nan = false;
T cur_min = 0;
T cur_max = 0;
for (size_t i = 0; i < size; ++i)
{
const T x = data[i];
if (null_map[i])
continue;
if (!has_not_null)
{
cur_min = x;
cur_max = x;
has_not_null = true;
has_not_nan = !isNaN(x);
continue;
}
if (isNaN(x))
continue;
if (!has_not_nan)
{
cur_min = x;
cur_max = x;
has_not_nan = true;
continue;
}
if (x < cur_min)
cur_min = x;
else if (x > cur_max)
cur_max = x;
}
if (has_not_null)
{
min = cur_min;
max = cur_max;
}
2016-08-16 11:26:17 +00:00
}
}
void ColumnNullable::getExtremes(Field & min, Field & max) const
{
min = Null();
max = Null();
2018-08-27 18:20:58 +00:00
const auto & null_map_data = getNullMapData();
2020-04-22 06:22:14 +00:00
if (const auto * col_i8 = typeid_cast<const ColumnInt8 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<Int8>(*col_i8, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_i16 = typeid_cast<const ColumnInt16 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<Int16>(*col_i16, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_i32 = typeid_cast<const ColumnInt32 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<Int32>(*col_i32, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_i64 = typeid_cast<const ColumnInt64 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<Int64>(*col_i64, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_u8 = typeid_cast<const ColumnUInt8 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<UInt8>(*col_u8, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_u16 = typeid_cast<const ColumnUInt16 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<UInt16>(*col_u16, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_u32 = typeid_cast<const ColumnUInt32 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<UInt32>(*col_u32, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_u64 = typeid_cast<const ColumnUInt64 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<UInt64>(*col_u64, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_f32 = typeid_cast<const ColumnFloat32 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<Float32>(*col_f32, null_map_data, min, max);
2020-04-22 06:22:14 +00:00
else if (const auto * col_f64 = typeid_cast<const ColumnFloat64 *>(nested_column.get()))
2018-08-27 18:20:58 +00:00
getExtremesFromNullableContent<Float64>(*col_f64, null_map_data, min, max);
}
2018-03-20 14:17:09 +00:00
ColumnPtr ColumnNullable::replicate(const Offsets & offsets) const
{
ColumnPtr replicated_data = getNestedColumn().replicate(offsets);
ColumnPtr replicated_null_map = getNullMapColumn().replicate(offsets);
return ColumnNullable::create(replicated_data, replicated_null_map);
}
template <bool negative>
2017-03-29 11:33:07 +00:00
void ColumnNullable::applyNullMapImpl(const ColumnUInt8 & map)
{
NullMap & arr1 = getNullMapData();
const NullMap & arr2 = map.getData();
if (arr1.size() != arr2.size())
throw Exception{"Inconsistent sizes of ColumnNullable objects", ErrorCodes::LOGICAL_ERROR};
for (size_t i = 0, size = arr1.size(); i < size; ++i)
arr1[i] |= negative ^ arr2[i];
}
2017-03-29 11:33:07 +00:00
void ColumnNullable::applyNullMap(const ColumnUInt8 & map)
{
applyNullMapImpl<false>(map);
}
2017-03-29 11:33:07 +00:00
void ColumnNullable::applyNegatedNullMap(const ColumnUInt8 & map)
{
applyNullMapImpl<true>(map);
}
2017-03-29 11:33:07 +00:00
void ColumnNullable::applyNullMap(const ColumnNullable & other)
{
applyNullMap(other.getNullMapColumn());
}
2017-04-17 20:19:09 +00:00
void ColumnNullable::checkConsistency() const
{
if (null_map->size() != getNestedColumn().size())
2017-04-17 20:19:09 +00:00
throw Exception("Logical error: Sizes of nested column and null map of Nullable column are not equal",
ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT);
}
2017-12-10 22:44:04 +00:00
ColumnPtr makeNullable(const ColumnPtr & column)
{
2019-07-01 11:44:19 +00:00
if (isColumnNullable(*column))
2017-12-10 22:44:04 +00:00
return column;
if (isColumnConst(*column))
return ColumnConst::create(makeNullable(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size());
2017-12-10 22:44:04 +00:00
return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0));
2017-12-10 22:44:04 +00:00
}
}