Merge pull request #8123 from ClickHouse/aku/field-prepare

Prepare for stricter type checking in Field.
This commit is contained in:
alexey-milovidov 2019-12-11 02:21:34 +03:00 committed by GitHub
commit aecd5a5493
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 145 additions and 181 deletions

View File

@ -34,97 +34,23 @@ struct StaticVisitor
/// F is template parameter, to allow universal reference for field, that is useful for const and non-const values.
template <typename Visitor, typename F>
typename std::decay_t<Visitor>::ResultType applyVisitor(Visitor && visitor, F && field)
auto applyVisitor(Visitor && visitor, F && field)
{
switch (field.getType())
{
case Field::Types::Null: return visitor(field.template get<Null>());
case Field::Types::UInt64: return visitor(field.template get<UInt64>());
case Field::Types::UInt128: return visitor(field.template get<UInt128>());
case Field::Types::Int64: return visitor(field.template get<Int64>());
case Field::Types::Float64: return visitor(field.template get<Float64>());
case Field::Types::String: return visitor(field.template get<String>());
case Field::Types::Array: return visitor(field.template get<Array>());
case Field::Types::Tuple: return visitor(field.template get<Tuple>());
case Field::Types::Decimal32: return visitor(field.template get<DecimalField<Decimal32>>());
case Field::Types::Decimal64: return visitor(field.template get<DecimalField<Decimal64>>());
case Field::Types::Decimal128: return visitor(field.template get<DecimalField<Decimal128>>());
case Field::Types::AggregateFunctionState: return visitor(field.template get<AggregateFunctionStateData>());
default:
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
}
}
template <typename Visitor, typename F1, typename F2>
static typename std::decay_t<Visitor>::ResultType applyBinaryVisitorImpl(Visitor && visitor, F1 && field1, F2 && field2)
{
switch (field2.getType())
{
case Field::Types::Null: return visitor(field1, field2.template get<Null>());
case Field::Types::UInt64: return visitor(field1, field2.template get<UInt64>());
case Field::Types::UInt128: return visitor(field1, field2.template get<UInt128>());
case Field::Types::Int64: return visitor(field1, field2.template get<Int64>());
case Field::Types::Float64: return visitor(field1, field2.template get<Float64>());
case Field::Types::String: return visitor(field1, field2.template get<String>());
case Field::Types::Array: return visitor(field1, field2.template get<Array>());
case Field::Types::Tuple: return visitor(field1, field2.template get<Tuple>());
case Field::Types::Decimal32: return visitor(field1, field2.template get<DecimalField<Decimal32>>());
case Field::Types::Decimal64: return visitor(field1, field2.template get<DecimalField<Decimal64>>());
case Field::Types::Decimal128: return visitor(field1, field2.template get<DecimalField<Decimal128>>());
case Field::Types::AggregateFunctionState: return visitor(field1, field2.template get<AggregateFunctionStateData>());
default:
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
}
return Field::dispatch(visitor, field);
}
template <typename Visitor, typename F1, typename F2>
typename std::decay_t<Visitor>::ResultType applyVisitor(Visitor && visitor, F1 && field1, F2 && field2)
auto applyVisitor(Visitor && visitor, F1 && field1, F2 && field2)
{
switch (field1.getType())
{
case Field::Types::Null:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Null>(), std::forward<F2>(field2));
case Field::Types::UInt64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<UInt64>(), std::forward<F2>(field2));
case Field::Types::UInt128:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<UInt128>(), std::forward<F2>(field2));
case Field::Types::Int64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Int64>(), std::forward<F2>(field2));
case Field::Types::Float64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Float64>(), std::forward<F2>(field2));
case Field::Types::String:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<String>(), std::forward<F2>(field2));
case Field::Types::Array:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Array>(), std::forward<F2>(field2));
case Field::Types::Tuple:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Tuple>(), std::forward<F2>(field2));
case Field::Types::Decimal32:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<DecimalField<Decimal32>>(), std::forward<F2>(field2));
case Field::Types::Decimal64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<DecimalField<Decimal64>>(), std::forward<F2>(field2));
case Field::Types::Decimal128:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<DecimalField<Decimal128>>(), std::forward<F2>(field2));
case Field::Types::AggregateFunctionState:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<AggregateFunctionStateData>(), std::forward<F2>(field2));
default:
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
}
return Field::dispatch([&](auto & field1_value)
{
return Field::dispatch([&](auto & field2_value)
{
return visitor(field1_value, field2_value);
},
field2);
},
field1);
}
@ -473,8 +399,14 @@ private:
public:
explicit FieldVisitorSum(const Field & rhs_) : rhs(rhs_) {}
bool operator() (UInt64 & x) const { x += get<UInt64>(rhs); return x != 0; }
bool operator() (Int64 & x) const { x += get<Int64>(rhs); return x != 0; }
// We can add all ints as unsigned regardless of their actual signedness.
bool operator() (Int64 & x) const { return this->operator()(reinterpret_cast<UInt64 &>(x)); }
bool operator() (UInt64 & x) const
{
x += rhs.reinterpret<UInt64>();
return x != 0;
}
bool operator() (Float64 & x) const { x += get<Float64>(rhs); return x != 0; }
bool operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); }

View File

@ -295,7 +295,7 @@ namespace DB
void writeFieldText(const Field & x, WriteBuffer & buf)
{
DB::String res = applyVisitor(DB::FieldVisitorToString(), x);
DB::String res = Field::dispatch(DB::FieldVisitorToString(), x);
buf.write(res.data(), res.size());
}

View File

@ -27,7 +27,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename T>
template <typename T, typename SFINAE = void>
struct NearestFieldTypeImpl;
template <typename T>
@ -151,6 +151,54 @@ private:
UInt32 scale;
};
/// char may be signed or unsigned, and behave identically to signed char or unsigned char,
/// but they are always three different types.
/// signedness of char is different in Linux on x86 and Linux on ARM.
template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<DayNum> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt128> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<UUID> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<Int16> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<Int32> { using Type = Int64; };
/// long and long long are always different types that may behave identically or not.
/// This is different on Linux and Mac.
template <> struct NearestFieldTypeImpl<long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<long long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<unsigned long long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Int128> { using Type = Int128; };
template <> struct NearestFieldTypeImpl<Decimal32> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<Decimal64> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<Decimal128> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal32>> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal64>> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal128>> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<Float32> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<Float64> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<const char *> { using Type = String; };
template <> struct NearestFieldTypeImpl<String> { using Type = String; };
template <> struct NearestFieldTypeImpl<Array> { using Type = Array; };
template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };
template <> struct NearestFieldTypeImpl<AggregateFunctionStateData> { using Type = AggregateFunctionStateData; };
// For enum types, use the field type that corresponds to their underlying type.
template <typename T>
struct NearestFieldTypeImpl<T, std::enable_if_t<std::is_enum_v<T>>>
{
using Type = NearestFieldType<std::underlying_type_t<T>>;
};
/** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
* NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
*/
@ -314,18 +362,24 @@ public:
bool isNull() const { return which == Types::Null; }
template <typename T> T & get()
template <typename T>
T & get();
template <typename T>
const T & get() const
{
using TWithoutRef = std::remove_reference_t<T>;
TWithoutRef * MAY_ALIAS ptr = reinterpret_cast<TWithoutRef*>(&storage);
return *ptr;
auto mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->get<T>();
}
template <typename T> const T & get() const
template <typename T>
T & reinterpret();
template <typename T>
const T & reinterpret() const
{
using TWithoutRef = std::remove_reference_t<T>;
const TWithoutRef * MAY_ALIAS ptr = reinterpret_cast<const TWithoutRef*>(&storage);
return *ptr;
auto mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->reinterpret<T>();
}
template <typename T> bool tryGet(T & result)
@ -427,6 +481,8 @@ public:
return rhs <= *this;
}
// More like bitwise equality as opposed to semantic equality:
// Null equals Null and NaN equals NaN.
bool operator== (const Field & rhs) const
{
if (which != rhs.which)
@ -435,9 +491,13 @@ public:
switch (which)
{
case Types::Null: return true;
case Types::UInt64:
case Types::Int64:
case Types::Float64: return get<UInt64>() == rhs.get<UInt64>();
case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
case Types::Int64: return get<Int64>() == rhs.get<Int64>();
case Types::Float64:
{
// Compare as UInt64 so that NaNs compare as equal.
return reinterpret<UInt64>() == rhs.reinterpret<UInt64>();
}
case Types::String: return get<String>() == rhs.get<String>();
case Types::Array: return get<Array>() == rhs.get<Array>();
case Types::Tuple: return get<Tuple>() == rhs.get<Tuple>();
@ -457,6 +517,42 @@ public:
return !(*this == rhs);
}
/// Field is template parameter, to allow universal reference for field,
/// that is useful for const and non-const .
template <typename F, typename FieldRef>
static auto dispatch(F && f, FieldRef && field)
{
switch (field.which)
{
case Types::Null: return f(field.template get<Null>());
case Types::UInt64: return f(field.template get<UInt64>());
case Types::UInt128: return f(field.template get<UInt128>());
case Types::Int64: return f(field.template get<Int64>());
case Types::Float64: return f(field.template get<Float64>());
case Types::String: return f(field.template get<String>());
case Types::Array: return f(field.template get<Array>());
case Types::Tuple: return f(field.template get<Tuple>());
case Types::Decimal32: return f(field.template get<DecimalField<Decimal32>>());
case Types::Decimal64: return f(field.template get<DecimalField<Decimal64>>());
case Types::Decimal128: return f(field.template get<DecimalField<Decimal128>>());
case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>());
case Types::Int128:
// TODO: investigate where we need Int128 Fields. There are no
// field visitors that support them, and they only arise indirectly
// in some functions that use Decimal columns: they get the
// underlying Field value with get<Int128>(). Probably should be
// switched to DecimalField, but this is a whole endeavor in itself.
throw Exception("Unexpected Int128 in Field::dispatch()", ErrorCodes::LOGICAL_ERROR);
}
// GCC 9 complains that control reaches the end, despite that we handle
// all the cases above (maybe because of throw?). Return something to
// silence it.
Null null{};
return f(null);
}
private:
std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which),
Null, UInt64, UInt128, Int64, Int128, Float64, String, Array, Tuple,
@ -493,37 +589,6 @@ private:
}
template <typename F, typename Field> /// Field template parameter may be const or non-const Field.
static void dispatch(F && f, Field & field)
{
switch (field.which)
{
case Types::Null: f(field.template get<Null>()); return;
// gcc 7.3.0
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
case Types::UInt64: f(field.template get<UInt64>()); return;
case Types::UInt128: f(field.template get<UInt128>()); return;
case Types::Int64: f(field.template get<Int64>()); return;
case Types::Int128: f(field.template get<Int128>()); return;
case Types::Float64: f(field.template get<Float64>()); return;
#if !__clang__
#pragma GCC diagnostic pop
#endif
case Types::String: f(field.template get<String>()); return;
case Types::Array: f(field.template get<Array>()); return;
case Types::Tuple: f(field.template get<Tuple>()); return;
case Types::Decimal32: f(field.template get<DecimalField<Decimal32>>()); return;
case Types::Decimal64: f(field.template get<DecimalField<Decimal64>>()); return;
case Types::Decimal128: f(field.template get<DecimalField<Decimal128>>()); return;
case Types::AggregateFunctionState: f(field.template get<AggregateFunctionStateData>()); return;
}
}
void create(const Field & x)
{
dispatch([this] (auto & value) { createConcrete(value); }, x);
@ -621,6 +686,22 @@ template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = Dec
template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; };
template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { using Type = DecimalField<AggregateFunctionStateData>; };
template <typename T>
T & Field::get()
{
using ValueType = std::decay_t<T>;
//assert(TypeToEnum<NearestFieldType<ValueType>>::value == which);
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
return *ptr;
}
template <typename T>
T & Field::reinterpret()
{
using ValueType = std::decay_t<T>;
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
return *ptr;
}
template <typename T>
T get(const Field & field)
@ -651,49 +732,6 @@ template <> struct TypeName<Array> { static std::string get() { return "Array";
template <> struct TypeName<Tuple> { static std::string get() { return "Tuple"; } };
template <> struct TypeName<AggregateFunctionStateData> { static std::string get() { return "AggregateFunctionState"; } };
/// char may be signed or unsigned, and behave identically to signed char or unsigned char,
/// but they are always three different types.
/// signedness of char is different in Linux on x86 and Linux on ARM.
template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<DayNum> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt128> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<UUID> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<Int16> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<Int32> { using Type = Int64; };
/// long and long long are always different types that may behave identically or not.
/// This is different on Linux and Mac.
template <> struct NearestFieldTypeImpl<long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<long long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<unsigned long long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Int128> { using Type = Int128; };
template <> struct NearestFieldTypeImpl<Decimal32> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<Decimal64> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<Decimal128> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal32>> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal64>> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal128>> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<Float32> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<Float64> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<const char *> { using Type = String; };
template <> struct NearestFieldTypeImpl<String> { using Type = String; };
template <> struct NearestFieldTypeImpl<Array> { using Type = Array; };
template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };
template <> struct NearestFieldTypeImpl<AggregateFunctionStateData> { using Type = AggregateFunctionStateData; };
template <typename T>
decltype(auto) castToNearestFieldType(T && x)
{

View File

@ -15,8 +15,6 @@
namespace DB
{
template <> struct NearestFieldTypeImpl<PartLogElement::Type> { using Type = UInt64; };
Block PartLogElement::createBlock()
{
auto event_type_datatype = std::make_shared<DataTypeEnum8>(

View File

@ -21,8 +21,6 @@
namespace DB
{
template <> struct NearestFieldTypeImpl<QueryLogElement::Type> { using Type = UInt64; };
Block QueryLogElement::createBlock()
{
auto query_status_datatype = std::make_shared<DataTypeEnum8>(

View File

@ -10,8 +10,6 @@
namespace DB
{
template <> struct NearestFieldTypeImpl<Message::Priority> { using Type = UInt64; };
Block TextLogElement::createBlock()
{
auto priority_datatype = std::make_shared<DataTypeEnum8>(