Nullable primary key with correct KeyCondition

This commit is contained in:
Amos Bird 2021-01-02 17:47:38 +08:00
parent e4d80f53fa
commit f2ed5ef42b
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
36 changed files with 661 additions and 443 deletions

View File

@ -459,6 +459,8 @@ public:
explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {}
bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); }
bool operator() (NegativeInfinity &) const { throw Exception("Cannot compare -Inf", ErrorCodes::LOGICAL_ERROR); }
bool operator() (PositiveInfinity &) const { throw Exception("Cannot compare +Inf", ErrorCodes::LOGICAL_ERROR); }
bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot compare AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); }
bool operator() (Array & x) const { return compareImpl<Array>(x); }
@ -494,6 +496,8 @@ public:
explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {}
bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); }
bool operator() (NegativeInfinity &) const { throw Exception("Cannot compare -Inf", ErrorCodes::LOGICAL_ERROR); }
bool operator() (PositiveInfinity &) const { throw Exception("Cannot compare +Inf", ErrorCodes::LOGICAL_ERROR); }
bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); }
bool operator() (Array & x) const { return compareImpl<Array>(x); }

View File

@ -546,97 +546,54 @@ namespace
{
/// The following function implements a slightly more general version
/// of getExtremes() than the implementation from ColumnVector.
/// of getExtremes() than the implementation from Not-Null IColumns.
/// It takes into account the possible presence of nullable values.
template <typename T>
void getExtremesFromNullableContent(const ColumnVector<T> & col, const NullMap & null_map, Field & min, Field & max)
void getExtremesWithNulls(const IColumn & nested_column, const NullMap & null_array, Field & min, Field & max, bool null_last = false)
{
const auto & data = col.getData();
size_t size = data.size();
if (size == 0)
size_t number_of_nulls = 0;
size_t n = null_array.size();
NullMap not_null_array(n);
for (auto i = 0ul; i < n; ++i)
{
min = Null();
max = Null();
return;
}
bool has_not_null = false;
bool has_not_nan = false;
T cur_min = 0;
T cur_max = 0;
for (size_t i = 0; i < size; ++i)
{
const T x = data[i];
if (null_map[i])
continue;
if (!has_not_null)
if (null_array[i])
{
cur_min = x;
cur_max = x;
has_not_null = true;
has_not_nan = !isNaN(x);
continue;
++number_of_nulls;
not_null_array[i] = 0;
}
if (isNaN(x))
continue;
if (!has_not_nan)
else
{
cur_min = x;
cur_max = x;
has_not_nan = true;
continue;
not_null_array[i] = 1;
}
if (x < cur_min)
cur_min = x;
else if (x > cur_max)
cur_max = x;
}
if (has_not_null)
if (number_of_nulls == 0)
{
min = cur_min;
max = cur_max;
nested_column.getExtremes(min, max);
}
else if (number_of_nulls == n)
{
min = PositiveInfinity();
max = PositiveInfinity();
}
else
{
auto filtered_column = nested_column.filter(not_null_array, -1);
filtered_column->getExtremes(min, max);
if (null_last)
max = PositiveInfinity();
}
}
}
void ColumnNullable::getExtremes(Field & min, Field & max) const
{
min = Null();
max = Null();
getExtremesWithNulls(getNestedColumn(), getNullMapData(), min, max);
}
const auto & null_map_data = getNullMapData();
if (const auto * col_i8 = typeid_cast<const ColumnInt8 *>(nested_column.get()))
getExtremesFromNullableContent<Int8>(*col_i8, null_map_data, min, max);
else if (const auto * col_i16 = typeid_cast<const ColumnInt16 *>(nested_column.get()))
getExtremesFromNullableContent<Int16>(*col_i16, null_map_data, min, max);
else if (const auto * col_i32 = typeid_cast<const ColumnInt32 *>(nested_column.get()))
getExtremesFromNullableContent<Int32>(*col_i32, null_map_data, min, max);
else if (const auto * col_i64 = typeid_cast<const ColumnInt64 *>(nested_column.get()))
getExtremesFromNullableContent<Int64>(*col_i64, null_map_data, min, max);
else if (const auto * col_u8 = typeid_cast<const ColumnUInt8 *>(nested_column.get()))
getExtremesFromNullableContent<UInt8>(*col_u8, null_map_data, min, max);
else if (const auto * col_u16 = typeid_cast<const ColumnUInt16 *>(nested_column.get()))
getExtremesFromNullableContent<UInt16>(*col_u16, null_map_data, min, max);
else if (const auto * col_u32 = typeid_cast<const ColumnUInt32 *>(nested_column.get()))
getExtremesFromNullableContent<UInt32>(*col_u32, null_map_data, min, max);
else if (const auto * col_u64 = typeid_cast<const ColumnUInt64 *>(nested_column.get()))
getExtremesFromNullableContent<UInt64>(*col_u64, null_map_data, min, max);
else if (const auto * col_f32 = typeid_cast<const ColumnFloat32 *>(nested_column.get()))
getExtremesFromNullableContent<Float32>(*col_f32, null_map_data, min, max);
else if (const auto * col_f64 = typeid_cast<const ColumnFloat64 *>(nested_column.get()))
getExtremesFromNullableContent<Float64>(*col_f64, null_map_data, min, max);
void ColumnNullable::getExtremesNullLast(Field & min, Field & max) const
{
getExtremesWithNulls(getNestedColumn(), getNullMapData(), min, max, true);
}

View File

@ -111,6 +111,8 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override;
// Special function for nullable minmax index
void getExtremesNullLast(Field & min, Field & max) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{

View File

@ -26,6 +26,16 @@ public:
throw Exception("Cannot convert NULL to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
}
T operator() (const NegativeInfinity &) const
{
throw Exception("Cannot convert -Inf to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
}
T operator() (const PositiveInfinity &) const
{
throw Exception("Cannot convert +Inf to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);
}
T operator() (const String &) const
{
throw Exception("Cannot convert String to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE);

View File

@ -25,6 +25,8 @@ static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf)
}
String FieldVisitorDump::operator() (const Null &) const { return "NULL"; }
String FieldVisitorDump::operator() (const NegativeInfinity &) const { return "-Inf"; }
String FieldVisitorDump::operator() (const PositiveInfinity &) const { return "+Inf"; }
String FieldVisitorDump::operator() (const UInt64 & x) const { return formatQuotedWithPrefix(x, "UInt64_"); }
String FieldVisitorDump::operator() (const Int64 & x) const { return formatQuotedWithPrefix(x, "Int64_"); }
String FieldVisitorDump::operator() (const Float64 & x) const { return formatQuotedWithPrefix(x, "Float64_"); }

View File

@ -10,6 +10,8 @@ class FieldVisitorDump : public StaticVisitor<String>
{
public:
String operator() (const Null & x) const;
String operator() (const NegativeInfinity & x) const;
String operator() (const PositiveInfinity & x) const;
String operator() (const UInt64 & x) const;
String operator() (const UInt128 & x) const;
String operator() (const UInt256 & x) const;

View File

@ -14,6 +14,18 @@ void FieldVisitorHash::operator() (const Null &) const
hash.update(type);
}
void FieldVisitorHash::operator() (const NegativeInfinity &) const
{
UInt8 type = Field::Types::NegativeInfinity;
hash.update(type);
}
void FieldVisitorHash::operator() (const PositiveInfinity &) const
{
UInt8 type = Field::Types::PositiveInfinity;
hash.update(type);
}
void FieldVisitorHash::operator() (const UInt64 & x) const
{
UInt8 type = Field::Types::UInt64;

View File

@ -16,6 +16,8 @@ public:
FieldVisitorHash(SipHash & hash_);
void operator() (const Null & x) const;
void operator() (const NegativeInfinity & x) const;
void operator() (const PositiveInfinity & x) const;
void operator() (const UInt64 & x) const;
void operator() (const UInt128 & x) const;
void operator() (const UInt256 & x) const;

View File

@ -22,6 +22,8 @@ bool FieldVisitorSum::operator() (UInt64 & x) const
bool FieldVisitorSum::operator() (Float64 & x) const { x += get<Float64>(rhs); return x != 0; }
bool FieldVisitorSum::operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (NegativeInfinity &) const { throw Exception("Cannot sum -Inf", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (PositiveInfinity &) const { throw Exception("Cannot sum +Inf", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); }
bool FieldVisitorSum::operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); }

View File

@ -21,6 +21,8 @@ public:
bool operator() (UInt64 & x) const;
bool operator() (Float64 & x) const;
bool operator() (Null &) const;
bool operator() (NegativeInfinity & x) const;
bool operator() (PositiveInfinity & x) const;
bool operator() (String &) const;
bool operator() (Array &) const;
bool operator() (Tuple &) const;

View File

@ -53,6 +53,8 @@ static String formatFloat(const Float64 x)
String FieldVisitorToString::operator() (const Null &) const { return "NULL"; }
String FieldVisitorToString::operator() (const NegativeInfinity &) const { return "-Inf"; }
String FieldVisitorToString::operator() (const PositiveInfinity &) const { return "+Inf"; }
String FieldVisitorToString::operator() (const UInt64 & x) const { return formatQuoted(x); }
String FieldVisitorToString::operator() (const Int64 & x) const { return formatQuoted(x); }
String FieldVisitorToString::operator() (const Float64 & x) const { return formatFloat(x); }

View File

@ -10,6 +10,8 @@ class FieldVisitorToString : public StaticVisitor<String>
{
public:
String operator() (const Null & x) const;
String operator() (const NegativeInfinity & x) const;
String operator() (const PositiveInfinity & x) const;
String operator() (const UInt64 & x) const;
String operator() (const UInt128 & x) const;
String operator() (const UInt256 & x) const;

View File

@ -7,6 +7,8 @@ namespace DB
{
void FieldVisitorWriteBinary::operator() (const Null &, WriteBuffer &) const { }
void FieldVisitorWriteBinary::operator() (const NegativeInfinity &, WriteBuffer &) const { }
void FieldVisitorWriteBinary::operator() (const PositiveInfinity &, WriteBuffer &) const { }
void FieldVisitorWriteBinary::operator() (const UInt64 & x, WriteBuffer & buf) const { writeVarUInt(x, buf); }
void FieldVisitorWriteBinary::operator() (const Int64 & x, WriteBuffer & buf) const { writeVarInt(x, buf); }
void FieldVisitorWriteBinary::operator() (const Float64 & x, WriteBuffer & buf) const { writeFloatBinary(x, buf); }

View File

@ -9,6 +9,8 @@ class FieldVisitorWriteBinary
{
public:
void operator() (const Null & x, WriteBuffer & buf) const;
void operator() (const NegativeInfinity & x, WriteBuffer & buf) const;
void operator() (const PositiveInfinity & x, WriteBuffer & buf) const;
void operator() (const UInt64 & x, WriteBuffer & buf) const;
void operator() (const UInt128 & x, WriteBuffer & buf) const;
void operator() (const UInt256 & x, WriteBuffer & buf) const;

View File

@ -26,8 +26,12 @@ public:
template <typename T, typename U>
bool operator() (const T & l, const U & r) const
{
if constexpr (std::is_same_v<T, Null> || std::is_same_v<U, Null>)
if constexpr (std::is_same_v<T, Null> || std::is_same_v<U, Null>
|| std::is_same_v<T, NegativeInfinity> || std::is_same_v<T, PositiveInfinity>
|| std::is_same_v<U, NegativeInfinity> || std::is_same_v<U, PositiveInfinity>)
{
return std::is_same_v<T, U>;
}
else
{
if constexpr (std::is_same_v<T, U>)
@ -77,6 +81,10 @@ public:
{
if constexpr (std::is_same_v<T, Null> || std::is_same_v<U, Null>)
return false;
else if constexpr (std::is_same_v<T, NegativeInfinity> || std::is_same_v<U, PositiveInfinity>)
return !std::is_same_v<T, U>;
else if constexpr (std::is_same_v<U, NegativeInfinity> || std::is_same_v<T, PositiveInfinity>)
return false;
else
{
if constexpr (std::is_same_v<T, U>)

View File

@ -455,6 +455,16 @@ inline void writeText(const Null &, WriteBuffer & buf)
writeText(std::string("NULL"), buf);
}
inline void writeText(const NegativeInfinity &, WriteBuffer & buf)
{
writeText(std::string("-Inf"), buf);
}
inline void writeText(const PositiveInfinity &, WriteBuffer & buf)
{
writeText(std::string("+Inf"), buf);
}
String toString(const Field & x)
{
return Field::dispatch(

View File

@ -218,6 +218,8 @@ template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
template <> struct NearestFieldTypeImpl<Map> { using Type = Map; };
template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };
template <> struct NearestFieldTypeImpl<NegativeInfinity> { using Type = NegativeInfinity; };
template <> struct NearestFieldTypeImpl<PositiveInfinity> { using Type = PositiveInfinity; };
template <> struct NearestFieldTypeImpl<AggregateFunctionStateData> { using Type = AggregateFunctionStateData; };
@ -251,11 +253,13 @@ public:
enum Which
{
Null = 0,
UInt64 = 1,
Int64 = 2,
Float64 = 3,
UInt128 = 4,
Int128 = 5,
NegativeInfinity = 1,
PositiveInfinity = 2,
UInt64 = 3,
Int64 = 4,
Float64 = 5,
UInt128 = 6,
Int128 = 7,
String = 16,
Array = 17,
@ -276,6 +280,8 @@ public:
switch (which)
{
case Null: return "Null";
case NegativeInfinity: return "-Inf";
case PositiveInfinity: return "+Inf";
case UInt64: return "UInt64";
case UInt128: return "UInt128";
case UInt256: return "UInt256";
@ -404,7 +410,10 @@ public:
Types::Which getType() const { return which; }
const char * getTypeName() const { return Types::toString(which); }
bool isNull() const { return which == Types::Null; }
// Non-valued field are all denoted as Null
bool isNull() const { return which == Types::Null || which == Types::NegativeInfinity || which == Types::PositiveInfinity; }
bool isNegativeInfinity() const { return which == Types::NegativeInfinity; }
bool isPositiveInfinity() const { return which == Types::PositiveInfinity; }
template <typename T>
@ -459,7 +468,10 @@ public:
switch (which)
{
case Types::Null: return false;
case Types::Null:
case Types::NegativeInfinity:
case Types::PositiveInfinity:
return false;
case Types::UInt64: return get<UInt64>() < rhs.get<UInt64>();
case Types::UInt128: return get<UInt128>() < rhs.get<UInt128>();
case Types::UInt256: return get<UInt256>() < rhs.get<UInt256>();
@ -496,7 +508,10 @@ public:
switch (which)
{
case Types::Null: return true;
case Types::Null:
case Types::NegativeInfinity:
case Types::PositiveInfinity:
return true;
case Types::UInt64: return get<UInt64>() <= rhs.get<UInt64>();
case Types::UInt128: return get<UInt128>() <= rhs.get<UInt128>();
case Types::UInt256: return get<UInt256>() <= rhs.get<UInt256>();
@ -533,8 +548,11 @@ public:
switch (which)
{
case Types::Null: return true;
case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
case Types::Null:
case Types::NegativeInfinity:
case Types::PositiveInfinity:
return true;
case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
case Types::Int64: return get<Int64>() == rhs.get<Int64>();
case Types::Float64:
{
@ -573,6 +591,8 @@ public:
switch (field.which)
{
case Types::Null: return f(field.template get<Null>());
case Types::NegativeInfinity: return f(field.template get<NegativeInfinity>());
case Types::PositiveInfinity: return f(field.template get<PositiveInfinity>());
// gcc 8.2.1
#if !defined(__clang__)
#pragma GCC diagnostic push
@ -731,6 +751,8 @@ using Row = std::vector<Field>;
template <> struct Field::TypeToEnum<Null> { static const Types::Which value = Types::Null; };
template <> struct Field::TypeToEnum<NegativeInfinity> { static const Types::Which value = Types::NegativeInfinity; };
template <> struct Field::TypeToEnum<PositiveInfinity> { static const Types::Which value = Types::PositiveInfinity; };
template <> struct Field::TypeToEnum<UInt64> { static const Types::Which value = Types::UInt64; };
template <> struct Field::TypeToEnum<UInt128> { static const Types::Which value = Types::UInt128; };
template <> struct Field::TypeToEnum<UInt256> { static const Types::Which value = Types::UInt256; };
@ -751,6 +773,8 @@ template <> struct Field::TypeToEnum<DecimalField<DateTime64>>{ static const Typ
template <> struct Field::TypeToEnum<AggregateFunctionStateData>{ static const Types::Which value = Types::AggregateFunctionState; };
template <> struct Field::EnumToType<Field::Types::Null> { using Type = Null; };
template <> struct Field::EnumToType<Field::Types::NegativeInfinity> { using Type = NegativeInfinity; };
template <> struct Field::EnumToType<Field::Types::PositiveInfinity> { using Type = PositiveInfinity; };
template <> struct Field::EnumToType<Field::Types::UInt64> { using Type = UInt64; };
template <> struct Field::EnumToType<Field::Types::UInt128> { using Type = UInt128; };
template <> struct Field::EnumToType<Field::Types::UInt256> { using Type = UInt256; };

View File

@ -14,6 +14,8 @@ namespace DB
/// Data types for representing elementary values from a database in RAM.
struct Null {};
struct NegativeInfinity {};
struct PositiveInfinity {};
/// Ignore strange gcc warning https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55776
#if !defined(__clang__)

View File

@ -19,6 +19,7 @@ namespace DB
namespace ErrorCodes
{
extern const int EMPTY_DATA_PASSED;
extern const int LOGICAL_ERROR;
}
@ -27,6 +28,16 @@ DataTypePtr FieldToDataType::operator() (const Null &) const
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
}
DataTypePtr FieldToDataType::operator() (const NegativeInfinity &) const
{
throw Exception("It's invalid to have -inf literals in SQL", ErrorCodes::LOGICAL_ERROR);
}
DataTypePtr FieldToDataType::operator() (const PositiveInfinity &) const
{
throw Exception("It's invalid to have +inf literals in SQL", ErrorCodes::LOGICAL_ERROR);
}
DataTypePtr FieldToDataType::operator() (const UInt64 & x) const
{
if (x <= std::numeric_limits<UInt8>::max()) return std::make_shared<DataTypeUInt8>();

View File

@ -21,6 +21,8 @@ class FieldToDataType : public StaticVisitor<DataTypePtr>
{
public:
DataTypePtr operator() (const Null & x) const;
DataTypePtr operator() (const NegativeInfinity & x) const;
DataTypePtr operator() (const PositiveInfinity & x) const;
DataTypePtr operator() (const UInt64 & x) const;
DataTypePtr operator() (const UInt128 & x) const;
DataTypePtr operator() (const UInt256 & x) const;

View File

@ -359,7 +359,7 @@ SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_o
}
/// Performance optimisation for IN() if storage supports it.
/// Performance optimization for IN() if storage supports it.
void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
{
if (!node || !storage() || !storage()->supportsIndexForIn())

View File

@ -217,6 +217,8 @@ bool Set::insertFromBlock(const Block & block)
set_elements[i] = filtered_column;
else
set_elements[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size());
if (transform_null_in && null_map_holder)
set_elements[i]->insert(Null{});
}
}
@ -281,7 +283,7 @@ ColumnPtr Set::execute(const Block & block, bool negative) const
key_columns.emplace_back() = materialized_columns.back().get();
}
/// We will check existence in Set only for keys, where all components are not NULL.
/// We will check existence in Set only for keys whose components do not contain any NULL value.
ConstNullMapPtr null_map{};
ColumnPtr null_map_holder;
if (!transform_null_in)
@ -408,7 +410,7 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
std::sort(indexes_mapping.begin(), indexes_mapping.end(),
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
{
return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index);
return std::tie(l.key_index, l.tuple_index) < std::tie(r.key_index, r.tuple_index);
});
indexes_mapping.erase(std::unique(
@ -447,8 +449,8 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
{
size_t tuple_size = indexes_mapping.size();
ColumnsWithInfinity left_point;
ColumnsWithInfinity right_point;
FieldValues left_point;
FieldValues right_point;
left_point.reserve(tuple_size);
right_point.reserve(tuple_size);
@ -458,8 +460,8 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
right_point.emplace_back(ordered_set[i]->cloneEmpty());
}
bool invert_left_infinities = false;
bool invert_right_infinities = false;
bool left_included = true;
bool right_included = true;
for (size_t i = 0; i < tuple_size; ++i)
{
@ -471,48 +473,29 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
if (!new_range)
return {true, true};
/** A range that ends in (x, y, ..., +inf) exclusive is the same as a range
* that ends in (x, y, ..., -inf) inclusive and vice versa for the left bound.
*/
if (new_range->left_bounded)
{
if (!new_range->left_included)
invert_left_infinities = true;
left_point[i].update(new_range->left);
}
else
{
if (invert_left_infinities)
left_point[i].update(ValueWithInfinity::PLUS_INFINITY);
else
left_point[i].update(ValueWithInfinity::MINUS_INFINITY);
}
if (new_range->right_bounded)
{
if (!new_range->right_included)
invert_right_infinities = true;
right_point[i].update(new_range->right);
}
else
{
if (invert_right_infinities)
right_point[i].update(ValueWithInfinity::MINUS_INFINITY);
else
right_point[i].update(ValueWithInfinity::PLUS_INFINITY);
}
left_point[i].update(new_range->left);
left_included &= new_range->left_included;
right_point[i].update(new_range->right);
right_included &= new_range->right_included;
}
auto compare = [](const IColumn & lhs, const ValueWithInfinity & rhs, size_t row)
/// lhs < rhs return -1
/// lhs == rhs return 0
/// lhs > rhs return 1
auto compare = [](const IColumn & lhs, const FieldValue & rhs, size_t row)
{
auto type = rhs.getType();
/// Return inverted infinity sign, because in 'lhs' all values are finite.
if (type != ValueWithInfinity::NORMAL)
return -static_cast<int>(type);
return lhs.compareAt(row, 0, rhs.getColumnIfFinite(), 1);
if (rhs.isNegativeInfinity())
return 1;
if (rhs.isPositiveInfinity())
{
Field f;
lhs.get(row, f);
if (f.isNull())
return 0; // +Inf == +Inf
else
return -1;
}
return lhs.compareAt(row, 0, *rhs.column, 1);
};
auto less = [this, &compare, tuple_size](size_t row, const auto & point)
@ -535,29 +518,27 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
};
/** Because each hyperrectangle maps to a contiguous sequence of elements
* laid out in the lexicographically increasing order, the set intersects the range
* if and only if either bound coincides with an element or at least one element
* is between the lower bounds
*/
* laid out in the lexicographically increasing order, the set intersects the range
* if and only if either bound coincides with an element or at least one element
* is between the lower bounds
*/
auto indices = collections::range(0, size());
auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, less);
auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, less);
/// A special case of 1-element KeyRange. It's useful for partition pruning
/// A special case of 1-element KeyRange. It's useful for partition pruning.
/// We don't take NULL into account here, as it's special in IN operator.
bool one_element_range = true;
for (size_t i = 0; i < tuple_size; ++i)
{
auto & left = left_point[i];
auto & right = right_point[i];
if (left.getType() == right.getType())
if (left.isNormal() && right.isNormal())
{
if (left.getType() == ValueWithInfinity::NORMAL)
if (0 != left.column->compareAt(0, 0, *right.column, 1))
{
if (0 != left.getColumnIfFinite().compareAt(0, 0, right.getColumnIfFinite(), 1))
{
one_element_range = false;
break;
}
one_element_range = false;
break;
}
}
else
@ -577,13 +558,46 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
return {false, true};
}
return
{
left_lower != right_lower
|| (left_lower != indices.end() && equals(*left_lower, left_point))
|| (right_lower != indices.end() && equals(*right_lower, right_point)),
true
};
/// If there are more than one element in the range, it can always be false. Thus we only need to check if it may be true or not.
/// Given X >= x, Y >= y, find if there may be an element z in [X, Y]. Note
/// Case 1: [x ... y]
if (left_included && right_included)
return {left_lower != right_lower || (right_lower != indices.end() && equals(*right_lower, right_point)), true};
/// Case 2: [x ... y)
else if (left_included && !right_included)
// X, Y
// x, y
return {left_lower != right_lower, true};
/// Case 3: (x ... y]
else if (!left_included && right_included)
return
{
// X, M, ..., Y
// x, y
left_lower + 1 < right_lower
// X, Y, ...
// x, y
|| (right_lower != indices.end() && equals(*right_lower, right_point))
// X, Y
// x, y
|| (left_lower < right_lower && !equals(*left_lower, left_point)),
true
};
/// Case 4: (x ... y)
else
return
{
// X, M, ..., Y
// x, y
left_lower + 1 < right_lower
// X, Y
// x, y
|| (left_lower < right_lower && !equals(*left_lower, left_point)),
true
};
}
bool MergeTreeSetIndex::hasMonotonicFunctionsChain() const
@ -594,23 +608,18 @@ bool MergeTreeSetIndex::hasMonotonicFunctionsChain() const
return false;
}
void ValueWithInfinity::update(const Field & x)
void FieldValue::update(const Field & x)
{
/// Keep at most one element in column.
if (!column->empty())
column->popBack(1);
column->insert(x);
type = NORMAL;
}
const IColumn & ValueWithInfinity::getColumnIfFinite() const
{
#ifndef NDEBUG
if (type != NORMAL)
throw Exception("Trying to get column of infinite type", ErrorCodes::LOGICAL_ERROR);
#endif
return *column;
if (x.isNegativeInfinity() || x.isPositiveInfinity())
value = x;
else
{
/// Keep at most one element in column.
if (!column->empty())
column->popBack(1);
column->insert(x);
value = Field(); // Set back to normal value.
}
}
}

View File

@ -178,29 +178,19 @@ using FunctionPtr = std::shared_ptr<IFunction>;
* Single field is stored in column for more optimal inplace comparisons with other regular columns.
* Extracting fields from columns and further their comparison is suboptimal and requires extra copying.
*/
class ValueWithInfinity
struct FieldValue
{
public:
enum Type
{
MINUS_INFINITY = -1,
NORMAL = 0,
PLUS_INFINITY = 1
};
ValueWithInfinity(MutableColumnPtr && column_)
: column(std::move(column_)), type(NORMAL) {}
FieldValue(MutableColumnPtr && column_) : column(std::move(column_)) {}
void update(const Field & x);
void update(Type type_) { type = type_; }
const IColumn & getColumnIfFinite() const;
bool isNormal() const { return !value.isPositiveInfinity() && !value.isNegativeInfinity(); }
bool isPositiveInfinity() const { return value.isPositiveInfinity(); }
bool isNegativeInfinity() const { return value.isNegativeInfinity(); }
Type getType() const { return type; }
Field value; // Null, -Inf, +Inf
private:
// If value is Null, uses the actual value in column
MutableColumnPtr column;
Type type;
};
@ -230,7 +220,7 @@ private:
Columns ordered_set;
std::vector<KeyTuplePositionMapping> indexes_mapping;
using ColumnsWithInfinity = std::vector<ValueWithInfinity>;
using FieldValues = std::vector<FieldValue>;
};
}

View File

@ -14,6 +14,7 @@
#include <Common/escapeForFileName.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/CurrentMetrics.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <common/JSON.h>
#include <common/logger_useful.h>
#include <Compression/getCompressionCodecForFile.h>
@ -78,6 +79,12 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Dis
Field max_val;
serialization->deserializeBinary(max_val, *file);
// NULL_LAST
if (min_val.isNull())
min_val = PositiveInfinity();
if (max_val.isNull())
max_val = PositiveInfinity();
hyperrectangle.emplace_back(min_val, true, max_val, true);
}
initialized = true;
@ -132,14 +139,19 @@ void IMergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names &
FieldRef min_value;
FieldRef max_value;
const ColumnWithTypeAndName & column = block.getByName(column_names[i]);
column.column->getExtremes(min_value, max_value);
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(column.column.get()))
column_nullable->getExtremesNullLast(min_value, max_value);
else
column.column->getExtremes(min_value, max_value);
if (!initialized)
hyperrectangle.emplace_back(min_value, true, max_value, true);
else
{
hyperrectangle[i].left = std::min(hyperrectangle[i].left, min_value);
hyperrectangle[i].right = std::max(hyperrectangle[i].right, max_value);
hyperrectangle[i].left
= applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].left, min_value) ? hyperrectangle[i].left : min_value;
hyperrectangle[i].right
= applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].right, max_value) ? max_value : hyperrectangle[i].right;
}
}

View File

@ -43,15 +43,8 @@ String Range::toString() const
{
WriteBufferFromOwnString str;
if (!left_bounded)
str << "(-inf, ";
else
str << (left_included ? '[' : '(') << applyVisitor(FieldVisitorToString(), left) << ", ";
if (!right_bounded)
str << "+inf)";
else
str << applyVisitor(FieldVisitorToString(), right) << (right_included ? ']' : ')');
str << (left_included ? '[' : '(') << applyVisitor(FieldVisitorToString(), left) << ", ";
str << applyVisitor(FieldVisitorToString(), right) << (right_included ? ']' : ')');
return str.str();
}
@ -205,6 +198,38 @@ const KeyCondition::AtomMap KeyCondition::atom_map
return true;
}
},
{
"nullIn",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_IN_SET;
return true;
}
},
{
"notNullIn",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_NOT_IN_SET;
return true;
}
},
{
"globalNullIn",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_IN_SET;
return true;
}
},
{
"globalNotNullIn",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_NOT_IN_SET;
return true;
}
},
{
"empty",
[] (RPNElement & out, const Field & value)
@ -291,6 +316,26 @@ const KeyCondition::AtomMap KeyCondition::atom_map
return true;
}
},
{
"isNotNull",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_IS_NOT_NULL;
// isNotNull means (-Inf, +Inf), which is the default Range
out.range = Range();
return true;
}
},
{
"isNull",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_IS_NULL;
// When using NULL_LAST, isNull means [+Inf, +Inf]
out.range = Range(Field(PositiveInfinity{}));
return true;
}
}
};
@ -304,6 +349,14 @@ static const std::map<std::string, std::string> inverse_relations = {
{"lessOrEquals", "greater"},
{"in", "notIn"},
{"notIn", "in"},
{"globalIn", "globalNotIn"},
{"globalNotIn", "globalIn"},
{"nullIn", "notNullIn"},
{"notNullIn", "nullIn"},
{"globalNullIn", "globalNotNullIn"},
{"globalNullNotIn", "globalNullIn"},
{"isNull", "isNotNull"},
{"isNotNull", "isNull"},
{"like", "notLike"},
{"notLike", "like"},
{"empty", "notEmpty"},
@ -613,7 +666,6 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
if (key_subexpr_names.count(expr_name) == 0)
return false;
/// TODO Nullable index is not yet landed.
if (out_value.isNull())
return false;
@ -735,7 +787,6 @@ bool KeyCondition::canConstantBeWrappedByFunctions(
const auto & sample_block = key_expr->getSampleBlock();
/// TODO Nullable index is not yet landed.
if (out_value.isNull())
return false;
@ -1134,7 +1185,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, Block & block_with_constants, RPNElement & out)
{
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
/** Functions < > = != <= >= in `notIn` isNull isNotNull, where one argument is a constant, and the other is one of columns of key,
* or itself, wrapped in a chain of possibly-monotonic functions,
* or constant expression - number.
*/
@ -1179,8 +1230,8 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context,
/// If we use this key condition to prune partitions by single value, we cannot relax conditions for NOT.
if (single_point
&& (func_name == "notLike" || func_name == "notIn" || func_name == "globalNotIn" || func_name == "notEquals"
|| func_name == "notEmpty"))
&& (func_name == "notLike" || func_name == "notIn" || func_name == "globalNotIn" || func_name == "notNullIn"
|| func_name == "globalNotNullIn" || func_name == "notEquals" || func_name == "notEmpty"))
strict_condition = true;
if (functionIsInOrGlobalInOperator(func_name))
@ -1491,6 +1542,8 @@ KeyCondition::Description KeyCondition::getDescription() const
else if (
element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_NOT_IN_RANGE
|| element.function == RPNElement::FUNCTION_IS_NULL
|| element.function == RPNElement::FUNCTION_IS_NOT_NULL
|| element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET)
{
@ -1655,11 +1708,13 @@ KeyCondition::Description KeyCondition::getDescription() const
* over at least one hyperrectangle from which this range consists.
*/
FieldRef negativeInfinity(NegativeInfinity{}), positiveInfinity(PositiveInfinity{});
template <typename F>
static BoolMask forAnyHyperrectangle(
size_t key_size,
const FieldRef * key_left,
const FieldRef * key_right,
const FieldRef * left_keys,
const FieldRef * right_keys,
bool left_bounded,
bool right_bounded,
std::vector<Range> & hyperrectangle,
@ -1675,10 +1730,10 @@ static BoolMask forAnyHyperrectangle(
/// Let's go through the matching elements of the key.
while (prefix_size < key_size)
{
if (key_left[prefix_size] == key_right[prefix_size])
if (left_keys[prefix_size] == right_keys[prefix_size])
{
/// Point ranges.
hyperrectangle[prefix_size] = Range(key_left[prefix_size]);
hyperrectangle[prefix_size] = Range(left_keys[prefix_size]);
++prefix_size;
}
else
@ -1692,11 +1747,11 @@ static BoolMask forAnyHyperrectangle(
if (prefix_size + 1 == key_size)
{
if (left_bounded && right_bounded)
hyperrectangle[prefix_size] = Range(key_left[prefix_size], true, key_right[prefix_size], true);
hyperrectangle[prefix_size] = Range(left_keys[prefix_size], true, right_keys[prefix_size], true);
else if (left_bounded)
hyperrectangle[prefix_size] = Range::createLeftBounded(key_left[prefix_size], true);
hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], true);
else if (right_bounded)
hyperrectangle[prefix_size] = Range::createRightBounded(key_right[prefix_size], true);
hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], true);
return callback(hyperrectangle);
}
@ -1704,11 +1759,11 @@ static BoolMask forAnyHyperrectangle(
/// (x1 .. x2) x (-inf .. +inf)
if (left_bounded && right_bounded)
hyperrectangle[prefix_size] = Range(key_left[prefix_size], false, key_right[prefix_size], false);
hyperrectangle[prefix_size] = Range(left_keys[prefix_size], false, right_keys[prefix_size], false);
else if (left_bounded)
hyperrectangle[prefix_size] = Range::createLeftBounded(key_left[prefix_size], false);
hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], false);
else if (right_bounded)
hyperrectangle[prefix_size] = Range::createRightBounded(key_right[prefix_size], false);
hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], false);
for (size_t i = prefix_size + 1; i < key_size; ++i)
hyperrectangle[i] = Range();
@ -1728,8 +1783,8 @@ static BoolMask forAnyHyperrectangle(
if (left_bounded)
{
hyperrectangle[prefix_size] = Range(key_left[prefix_size]);
result = result | forAnyHyperrectangle(key_size, key_left, key_right, true, false, hyperrectangle, prefix_size + 1, initial_mask, callback);
hyperrectangle[prefix_size] = Range(left_keys[prefix_size]);
result = result | forAnyHyperrectangle(key_size, left_keys, right_keys, true, false, hyperrectangle, prefix_size + 1, initial_mask, callback);
if (result.isComplete())
return result;
}
@ -1738,8 +1793,8 @@ static BoolMask forAnyHyperrectangle(
if (right_bounded)
{
hyperrectangle[prefix_size] = Range(key_right[prefix_size]);
result = result | forAnyHyperrectangle(key_size, key_left, key_right, false, true, hyperrectangle, prefix_size + 1, initial_mask, callback);
hyperrectangle[prefix_size] = Range(right_keys[prefix_size]);
result = result | forAnyHyperrectangle(key_size, left_keys, right_keys, false, true, hyperrectangle, prefix_size + 1, initial_mask, callback);
if (result.isComplete())
return result;
}
@ -1750,37 +1805,31 @@ static BoolMask forAnyHyperrectangle(
BoolMask KeyCondition::checkInRange(
size_t used_key_size,
const FieldRef * left_key,
const FieldRef * right_key,
const FieldRef * left_keys,
const FieldRef * right_keys,
const DataTypes & data_types,
bool right_bounded,
BoolMask initial_mask) const
{
std::vector<Range> key_ranges(used_key_size, Range());
/* std::cerr << "Checking for: [";
for (size_t i = 0; i != used_key_size; ++i)
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_key[i]);
std::cerr << " ... ";
// std::cerr << "Checking for: [";
// for (size_t i = 0; i != used_key_size; ++i)
// std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_keys[i]);
// std::cerr << " ... ";
if (right_bounded)
{
for (size_t i = 0; i != used_key_size; ++i)
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_key[i]);
std::cerr << "]\n";
}
else
std::cerr << "+inf)\n";*/
// for (size_t i = 0; i != used_key_size; ++i)
// std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_keys[i]);
// std::cerr << "]\n";
return forAnyHyperrectangle(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, initial_mask,
return forAnyHyperrectangle(used_key_size, left_keys, right_keys, true, true, key_ranges, 0, initial_mask,
[&] (const std::vector<Range> & key_ranges_hyperrectangle)
{
auto res = checkInHyperrectangle(key_ranges_hyperrectangle, data_types);
/* std::cerr << "Hyperrectangle: ";
for (size_t i = 0, size = key_ranges.size(); i != size; ++i)
std::cerr << (i != 0 ? " x " : "") << key_ranges[i].toString();
std::cerr << ": " << res.can_be_true << "\n";*/
// std::cerr << "Hyperrectangle: ";
// for (size_t i = 0, size = key_ranges.size(); i != size; ++i)
// std::cerr << (i != 0 ? " x " : "") << key_ranges[i].toString();
// std::cerr << ": " << res.can_be_true << "\n";
return res;
});
@ -1808,6 +1857,8 @@ std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
/// If we apply function to open interval, we can get empty intervals in result.
/// E.g. for ('2020-01-03', '2020-01-20') after applying 'toYYYYMM' we will get ('202001', '202001').
/// To avoid this we make range left and right included.
/// Any function that treats NULL specially is not monotonic.
/// Thus we can safely use isNull() as an -Inf/+Inf indicator here.
if (!key_range.left.isNull())
{
key_range.left = applyFunction(func, current_type, key_range.left);
@ -1823,7 +1874,7 @@ std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
current_type = func->getResultType();
if (!monotonicity.is_positive)
key_range.swapLeftAndRight();
key_range.invert();
}
return key_range;
}
@ -1948,6 +1999,17 @@ BoolMask KeyCondition::checkInHyperrectangle(
if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
rpn_stack.back() = !rpn_stack.back();
}
else if (
element.function == RPNElement::FUNCTION_IS_NULL
|| element.function == RPNElement::FUNCTION_IS_NOT_NULL)
{
const Range * key_range = &hyperrectangle[element.key_column];
/// No need to apply monotonic functions as nulls are kept.
bool intersects = element.range.intersectsRange(*key_range);
bool contains = element.range.containsRange(*key_range);
rpn_stack.emplace_back(intersects, !contains);
}
else if (
element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET)
@ -2002,43 +2064,13 @@ BoolMask KeyCondition::checkInHyperrectangle(
}
BoolMask KeyCondition::checkInRange(
size_t used_key_size,
const FieldRef * left_key,
const FieldRef * right_key,
const DataTypes & data_types,
BoolMask initial_mask) const
{
return checkInRange(used_key_size, left_key, right_key, data_types, true, initial_mask);
}
bool KeyCondition::mayBeTrueInRange(
size_t used_key_size,
const FieldRef * left_key,
const FieldRef * right_key,
const FieldRef * left_keys,
const FieldRef * right_keys,
const DataTypes & data_types) const
{
return checkInRange(used_key_size, left_key, right_key, data_types, true, BoolMask::consider_only_can_be_true).can_be_true;
}
BoolMask KeyCondition::checkAfter(
size_t used_key_size,
const FieldRef * left_key,
const DataTypes & data_types,
BoolMask initial_mask) const
{
return checkInRange(used_key_size, left_key, nullptr, data_types, false, initial_mask);
}
bool KeyCondition::mayBeTrueAfter(
size_t used_key_size,
const FieldRef * left_key,
const DataTypes & data_types) const
{
return checkInRange(used_key_size, left_key, nullptr, data_types, false, BoolMask::consider_only_can_be_true).can_be_true;
return checkInRange(used_key_size, left_keys, right_keys, data_types, BoolMask::consider_only_can_be_true).can_be_true;
}
String KeyCondition::RPNElement::toString() const { return toString("column " + std::to_string(key_column), false); }
@ -2108,6 +2140,15 @@ String KeyCondition::RPNElement::toString(const std::string_view & column_name,
buf << ")";
return buf.str();
}
case FUNCTION_IS_NULL:
case FUNCTION_IS_NOT_NULL:
{
buf << "(";
print_wrapped_column(buf);
buf << (function == FUNCTION_IS_NULL ? " isNull" : " isNotNull");
buf << ")";
return buf.str();
}
case ALWAYS_FALSE:
return "false";
case ALWAYS_TRUE:
@ -2149,6 +2190,8 @@ bool KeyCondition::unknownOrAlwaysTrue(bool unknown_any) const
|| element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET
|| element.function == RPNElement::FUNCTION_IS_NULL
|| element.function == RPNElement::FUNCTION_IS_NOT_NULL
|| element.function == RPNElement::ALWAYS_FALSE)
{
rpn_stack.push_back(false);
@ -2192,6 +2235,8 @@ size_t KeyCondition::getMaxKeyColumn() const
{
if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE
|| element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_IS_NULL
|| element.function == RPNElement::FUNCTION_IS_NOT_NULL
|| element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET)
{

View File

@ -55,25 +55,24 @@ private:
static bool less(const Field & lhs, const Field & rhs);
public:
FieldRef left; /// the left border, if any
FieldRef right; /// the right border, if any
bool left_bounded = false; /// bounded at the left
bool right_bounded = false; /// bounded at the right
bool left_included = false; /// includes the left border, if any
bool right_included = false; /// includes the right border, if any
FieldRef left = NegativeInfinity{}; /// the left border
FieldRef right = PositiveInfinity{}; /// the right border
bool left_included = false; /// includes the left border
bool right_included = false; /// includes the right border
/// The whole unversum.
/// The whole universe (not null).
Range() {}
/// One point.
Range(const FieldRef & point)
: left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {}
: left(point), right(point), left_included(true), right_included(true) {}
/// A bounded two-sided range.
Range(const FieldRef & left_, bool left_included_, const FieldRef & right_, bool right_included_)
: left(left_), right(right_),
left_bounded(true), right_bounded(true),
left_included(left_included_), right_included(right_included_)
: left(left_)
, right(right_)
, left_included(left_included_)
, right_included(right_included_)
{
shrinkToIncludedIfPossible();
}
@ -82,9 +81,11 @@ public:
{
Range r;
r.right = right_point;
r.right_bounded = true;
r.right_included = right_included;
r.shrinkToIncludedIfPossible();
// Special case for [-Inf, -Inf]
if (r.right.isNegativeInfinity() && right_included)
r.left_included = true;
return r;
}
@ -92,9 +93,11 @@ public:
{
Range r;
r.left = left_point;
r.left_bounded = true;
r.left_included = left_included;
r.shrinkToIncludedIfPossible();
// Special case for [+Inf, +Inf]
if (r.left.isPositiveInfinity() && left_included)
r.right_included = true;
return r;
}
@ -104,7 +107,7 @@ public:
*/
void shrinkToIncludedIfPossible()
{
if (left.isExplicit() && left_bounded && !left_included)
if (left.isExplicit() && !left_included)
{
if (left.getType() == Field::Types::UInt64 && left.get<UInt64>() != std::numeric_limits<UInt64>::max())
{
@ -117,7 +120,7 @@ public:
left_included = true;
}
}
if (right.isExplicit() && right_bounded && !right_included)
if (right.isExplicit() && !right_included)
{
if (right.getType() == Field::Types::UInt64 && right.get<UInt64>() != std::numeric_limits<UInt64>::min())
{
@ -132,12 +135,7 @@ public:
}
}
bool empty() const
{
return left_bounded && right_bounded
&& (less(right, left)
|| ((!left_included || !right_included) && !less(left, right)));
}
bool empty() const { return less(right, left) || ((!left_included || !right_included) && !less(left, right)); }
/// x contained in the range
bool contains(const FieldRef & x) const
@ -148,35 +146,23 @@ public:
/// x is to the left
bool rightThan(const FieldRef & x) const
{
return (left_bounded
? !(less(left, x) || (left_included && equals(x, left)))
: false);
return less(left, x) || (left_included && equals(x, left));
}
/// x is to the right
bool leftThan(const FieldRef & x) const
{
return (right_bounded
? !(less(x, right) || (right_included && equals(x, right)))
: false);
return less(x, right) || (right_included && equals(x, right));
}
bool intersectsRange(const Range & r) const
{
/// r to the left of me.
if (r.right_bounded
&& left_bounded
&& (less(r.right, left)
|| ((!left_included || !r.right_included)
&& equals(r.right, left))))
if (less(r.right, left) || ((!left_included || !r.right_included) && equals(r.right, left)))
return false;
/// r to the right of me.
if (r.left_bounded
&& right_bounded
&& (less(right, r.left) /// ...} {...
|| ((!right_included || !r.left_included) /// ...) [... or ...] (...
&& equals(r.left, right))))
if (less(right, r.left) || ((!right_included || !r.left_included) && equals(r.left, right)))
return false;
return true;
@ -185,30 +171,23 @@ public:
bool containsRange(const Range & r) const
{
/// r starts to the left of me.
if (left_bounded
&& (!r.left_bounded
|| less(r.left, left)
|| (r.left_included
&& !left_included
&& equals(r.left, left))))
if (less(r.left, left) || (r.left_included && !left_included && equals(r.left, left)))
return false;
/// r ends right of me.
if (right_bounded
&& (!r.right_bounded
|| less(right, r.right)
|| (r.right_included
&& !right_included
&& equals(r.right, right))))
if (less(right, r.right) || (r.right_included && !right_included && equals(r.right, right)))
return false;
return true;
}
void swapLeftAndRight()
void invert()
{
std::swap(left, right);
std::swap(left_bounded, right_bounded);
if (left.isPositiveInfinity())
left = NegativeInfinity{};
if (right.isNegativeInfinity())
right = PositiveInfinity{};
std::swap(left_included, right_included);
}
@ -247,16 +226,8 @@ public:
/// one of the resulting mask components (see BoolMask::consider_only_can_be_XXX).
BoolMask checkInRange(
size_t used_key_size,
const FieldRef * left_key,
const FieldRef* right_key,
const DataTypes & data_types,
BoolMask initial_mask = BoolMask(false, false)) const;
/// Are the condition and its negation valid in a semi-infinite (not limited to the right) key range.
/// left_key must contain all the fields in the sort_descr in the appropriate order.
BoolMask checkAfter(
size_t used_key_size,
const FieldRef * left_key,
const FieldRef * left_keys,
const FieldRef * right_keys,
const DataTypes & data_types,
BoolMask initial_mask = BoolMask(false, false)) const;
@ -264,15 +235,8 @@ public:
/// This is more efficient than checkInRange(...).can_be_true.
bool mayBeTrueInRange(
size_t used_key_size,
const FieldRef * left_key,
const FieldRef * right_key,
const DataTypes & data_types) const;
/// Same as checkAfter, but calculate only may_be_true component of a result.
/// This is more efficient than checkAfter(...).can_be_true.
bool mayBeTrueAfter(
size_t used_key_size,
const FieldRef * left_key,
const FieldRef * left_keys,
const FieldRef * right_keys,
const DataTypes & data_types) const;
/// Checks that the index can not be used
@ -338,6 +302,8 @@ private:
FUNCTION_NOT_IN_RANGE,
FUNCTION_IN_SET,
FUNCTION_NOT_IN_SET,
FUNCTION_IS_NULL,
FUNCTION_IS_NOT_NULL,
FUNCTION_UNKNOWN, /// Can take any value.
/// Operators of the logical expression.
FUNCTION_NOT,

View File

@ -1294,6 +1294,9 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
create_field_ref = [index_columns](size_t row, size_t column, FieldRef & field)
{
field = {index_columns.get(), row, column};
// NULL_LAST
if (field.isNull())
field = PositiveInfinity{};
};
}
else
@ -1301,6 +1304,9 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
create_field_ref = [&index](size_t row, size_t column, FieldRef & field)
{
index[column]->get(row, field);
// NULL_LAST
if (field.isNull())
field = PositiveInfinity{};
};
}
@ -1313,21 +1319,22 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
if (range.end == marks_count && !has_final_mark)
{
for (size_t i = 0; i < used_key_size; ++i)
{
create_field_ref(range.begin, i, index_left[i]);
return key_condition.mayBeTrueAfter(
used_key_size, index_left.data(), primary_key.data_types);
index_right[i] = PositiveInfinity{};
}
}
if (has_final_mark && range.end == marks_count)
range.end -= 1; /// Remove final empty mark. It's useful only for primary key condition.
for (size_t i = 0; i < used_key_size; ++i)
else
{
create_field_ref(range.begin, i, index_left[i]);
create_field_ref(range.end, i, index_right[i]);
}
if (has_final_mark && range.end == marks_count)
range.end -= 1; /// Remove final empty mark. It's useful only for primary key condition.
for (size_t i = 0; i < used_key_size; ++i)
{
create_field_ref(range.begin, i, index_left[i]);
create_field_ref(range.end, i, index_right[i]);
}
}
return key_condition.mayBeTrueInRange(
used_key_size, index_left.data(), index_right.data(), primary_key.data_types);
};

View File

@ -5,6 +5,7 @@
#include <Interpreters/TreeRewriter.h>
#include <Poco/Logger.h>
#include <Common/FieldVisitorsAccurateComparison.h>
namespace DB
{
@ -38,22 +39,8 @@ void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
{
const DataTypePtr & type = index_sample_block.getByPosition(i).type;
auto serialization = type->getDefaultSerialization();
if (!type->isNullable())
{
serialization->serializeBinary(hyperrectangle[i].left, ostr);
serialization->serializeBinary(hyperrectangle[i].right, ostr);
}
else
{
bool is_null = hyperrectangle[i].left.isNull() || hyperrectangle[i].right.isNull(); // one is enough
writeBinary(is_null, ostr);
if (!is_null)
{
serialization->serializeBinary(hyperrectangle[i].left, ostr);
serialization->serializeBinary(hyperrectangle[i].right, ostr);
}
}
serialization->serializeBinary(hyperrectangle[i].left, ostr);
serialization->serializeBinary(hyperrectangle[i].right, ostr);
}
}
@ -63,32 +50,18 @@ void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr)
Field min_val;
Field max_val;
for (size_t i = 0; i < index_sample_block.columns(); ++i)
{
const DataTypePtr & type = index_sample_block.getByPosition(i).type;
auto serialization = type->getDefaultSerialization();
serialization->deserializeBinary(min_val, istr);
serialization->deserializeBinary(max_val, istr);
if (!type->isNullable())
{
serialization->deserializeBinary(min_val, istr);
serialization->deserializeBinary(max_val, istr);
}
else
{
bool is_null;
readBinary(is_null, istr);
if (!is_null)
{
serialization->deserializeBinary(min_val, istr);
serialization->deserializeBinary(max_val, istr);
}
else
{
min_val = Null();
max_val = Null();
}
}
// NULL_LAST
if (min_val.isNull())
min_val = PositiveInfinity();
if (max_val.isNull())
max_val = PositiveInfinity();
hyperrectangle.emplace_back(min_val, true, max_val, true);
}
}
@ -117,8 +90,11 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s
for (size_t i = 0; i < index_sample_block.columns(); ++i)
{
auto index_column_name = index_sample_block.getByPosition(i).name;
const auto & column = block.getByName(index_column_name).column;
column->cut(*pos, rows_read)->getExtremes(field_min, field_max);
const auto & column = block.getByName(index_column_name).column->cut(*pos, rows_read);
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(column.get()))
column_nullable->getExtremesNullLast(field_min, field_max);
else
column->getExtremes(field_min, field_max);
if (hyperrectangle.size() <= i)
{
@ -126,8 +102,10 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s
}
else
{
hyperrectangle[i].left = std::min(hyperrectangle[i].left, field_min);
hyperrectangle[i].right = std::max(hyperrectangle[i].right, field_max);
hyperrectangle[i].left
= applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].left, field_min) ? hyperrectangle[i].left : field_min;
hyperrectangle[i].right
= applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].right, field_max) ? field_max : hyperrectangle[i].right;
}
}
@ -156,9 +134,6 @@ bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr
if (!granule)
throw Exception(
"Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
for (const auto & range : granule->hyperrectangle)
if (range.left.isNull() || range.right.isNull())
return true;
return condition.checkInHyperrectangle(granule->hyperrectangle, index_data_types).can_be_true;
}

View File

@ -43,6 +43,16 @@ namespace
UInt8 type = Field::Types::Null;
hash.update(type);
}
void operator() (const NegativeInfinity &) const
{
UInt8 type = Field::Types::NegativeInfinity;
hash.update(type);
}
void operator() (const PositiveInfinity &) const
{
UInt8 type = Field::Types::PositiveInfinity;
hash.update(type);
}
void operator() (const UInt64 & x) const
{
UInt8 type = Field::Types::UInt64;

View File

@ -3,7 +3,7 @@ DROP TABLE IF EXISTS min_max_with_nullable_string;
CREATE TABLE min_max_with_nullable_string (
t DateTime,
nullable_str Nullable(String),
INDEX nullable_str_min_max nullable_str TYPE minmax GRANULARITY 8192
INDEX nullable_str_min_max nullable_str TYPE minmax GRANULARITY 1
) ENGINE = MergeTree ORDER BY (t);
INSERT INTO min_max_with_nullable_string(t) VALUES (now()) (now());

View File

@ -1,35 +0,0 @@
0 0
2 3
4 6
6 9
8 12
10 15
12 18
14 21
16 24
18 27
\N 0
\N -1
\N -2
\N 0
\N -1
\N -2
0 0
2 3
4 6
6 9
8 12
10 15
12 18
14 21
16 24
18 27
12 18
14 21
16 24
18 27
0 0
2 3
4 6
6 9
8 12

View File

@ -1,13 +0,0 @@
DROP TABLE IF EXISTS nullable_key;
CREATE TABLE nullable_key (k Nullable(int), v int) ENGINE MergeTree ORDER BY k SETTINGS allow_nullable_key = 1;
INSERT INTO nullable_key SELECT number * 2, number * 3 FROM numbers(10);
INSERT INTO nullable_key SELECT NULL, -number FROM numbers(3);
SELECT * FROM nullable_key ORDER BY k;
SELECT * FROM nullable_key WHERE k IS NULL;
SELECT * FROM nullable_key WHERE k IS NOT NULL;
SELECT * FROM nullable_key WHERE k > 10;
SELECT * FROM nullable_key WHERE k < 10;
DROP TABLE nullable_key;

View File

@ -0,0 +1,81 @@
0 0
2 3
4 6
6 9
8 12
10 15
12 18
14 21
16 24
18 27
\N 0
\N -1
\N -2
\N 0
\N -1
\N -2
0 0
2 3
4 6
6 9
8 12
10 15
12 18
14 21
16 24
18 27
12 18
14 21
16 24
18 27
0 0
2 3
4 6
6 9
8 12
\N 0
\N -1
\N -2
0 0
2 3
4 6
6 9
8 12
10 15
12 18
14 21
16 24
18 27
10 15
\N 0
\N -1
\N -2
\N
123
1 1
1 3
2 \N
2 2
2 1
2 7
2 \N
3 \N
3 2
3 4
2 \N
2 \N
3 \N
1 3
2 7
3 4
1 1
2 2
2 1
3 2
1 3
2 7
3 4
1 1
2 2
2 1
3 2

View File

@ -0,0 +1,61 @@
DROP TABLE IF EXISTS nullable_key;
DROP TABLE IF EXISTS nullable_key_without_final_mark;
DROP TABLE IF EXISTS nullable_minmax_index;
SET max_threads = 1;
CREATE TABLE nullable_key (k Nullable(int), v int) ENGINE MergeTree ORDER BY k SETTINGS allow_nullable_key = 1, index_granularity = 1;
INSERT INTO nullable_key SELECT number * 2, number * 3 FROM numbers(10);
INSERT INTO nullable_key SELECT NULL, -number FROM numbers(3);
SELECT * FROM nullable_key ORDER BY k;
SET force_primary_key = 1;
SET max_rows_to_read = 3;
SELECT * FROM nullable_key WHERE k IS NULL;
SET max_rows_to_read = 10;
SELECT * FROM nullable_key WHERE k IS NOT NULL;
SET max_rows_to_read = 5;
SELECT * FROM nullable_key WHERE k > 10;
SELECT * FROM nullable_key WHERE k < 10;
OPTIMIZE TABLE nullable_key FINAL;
SET max_rows_to_read = 4; -- one additional left mark needs to be read
SELECT * FROM nullable_key WHERE k IS NULL;
SET max_rows_to_read = 10;
SELECT * FROM nullable_key WHERE k IS NOT NULL;
-- Nullable in set and with transform_null_in = 1
SET max_rows_to_read = 3;
SELECT * FROM nullable_key WHERE k IN (10, 20) SETTINGS transform_null_in = 1;
SET max_rows_to_read = 5;
SELECT * FROM nullable_key WHERE k IN (3, NULL) SETTINGS transform_null_in = 1;
CREATE TABLE nullable_key_without_final_mark (s Nullable(String)) ENGINE MergeTree ORDER BY s SETTINGS allow_nullable_key = 1, write_final_mark = 0;
INSERT INTO nullable_key_without_final_mark VALUES ('123'), (NULL);
SET max_rows_to_read = 0;
SELECT * FROM nullable_key_without_final_mark WHERE s IS NULL;
SELECT * FROM nullable_key_without_final_mark WHERE s IS NOT NULL;
CREATE TABLE nullable_minmax_index (k int, v Nullable(int), INDEX v_minmax v TYPE minmax GRANULARITY 4) ENGINE MergeTree ORDER BY k SETTINGS index_granularity = 1;
INSERT INTO nullable_minmax_index VALUES (1, 3), (2, 7), (3, 4), (2, NULL); -- [3, +Inf]
INSERT INTO nullable_minmax_index VALUES (1, 1), (2, 2), (3, 2), (2, 1); -- [1, 2]
INSERT INTO nullable_minmax_index VALUES (2, NULL), (3, NULL); -- [+Inf, +Inf]
SET force_primary_key = 0;
SELECT * FROM nullable_minmax_index ORDER BY k;
SET max_rows_to_read = 6;
SELECT * FROM nullable_minmax_index WHERE v IS NULL;
SET max_rows_to_read = 8;
SELECT * FROM nullable_minmax_index WHERE v IS NOT NULL;
SET max_rows_to_read = 6;
SELECT * FROM nullable_minmax_index WHERE v > 2;
SET max_rows_to_read = 4;
SELECT * FROM nullable_minmax_index WHERE v <= 2;
DROP TABLE nullable_key;
DROP TABLE nullable_key_without_final_mark;
DROP TABLE nullable_minmax_index;

View File

@ -0,0 +1,20 @@
0 2
1 3
0 2
1 3
0 2
1 3
0 2
1 3
0 2
1 3
0 2
1 3
0 2
1 3
0 2
1 3
\N 100
\N 100
\N 100
\N 100

View File

@ -0,0 +1,30 @@
drop table if exists xp;
drop table if exists xp_d;
create table xp(i Nullable(UInt64), j UInt64) engine MergeTree order by i settings index_granularity = 1, allow_nullable_key = 1;
create table xp_d as xp engine Distributed(test_shard_localhost, currentDatabase(), xp);
insert into xp select number, number + 2 from numbers(10);
insert into xp select null, 100;
optimize table xp final;
set max_rows_to_read = 2;
select * from xp where i in (select * from numbers(2));
select * from xp where i global in (select * from numbers(2));
select * from xp_d where i in (select * from numbers(2));
select * from xp_d where i global in (select * from numbers(2));
set transform_null_in = 1;
select * from xp where i in (select * from numbers(2));
select * from xp where i global in (select * from numbers(2));
select * from xp_d where i in (select * from numbers(2));
select * from xp_d where i global in (select * from numbers(2));
select * from xp where i in (null);
select * from xp where i global in (null);
select * from xp_d where i in (null);
select * from xp_d where i global in (null);
drop table if exists xp;
drop table if exists xp_d;