From 50a8a7197f6da9e07ac4d9da3905ec2ec86ba883 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 10 Dec 2019 16:40:45 +0300 Subject: [PATCH] Prepare for stricter type checking in Field. --- dbms/src/Common/FieldVisitors.h | 108 +++------------ dbms/src/Core/Field.cpp | 2 +- dbms/src/Core/Field.h | 210 +++++++++++++++++------------ dbms/src/Interpreters/PartLog.cpp | 2 - dbms/src/Interpreters/QueryLog.cpp | 2 - dbms/src/Interpreters/TextLog.cpp | 2 - 6 files changed, 145 insertions(+), 181 deletions(-) diff --git a/dbms/src/Common/FieldVisitors.h b/dbms/src/Common/FieldVisitors.h index a1de23d5820..1fc11350535 100644 --- a/dbms/src/Common/FieldVisitors.h +++ b/dbms/src/Common/FieldVisitors.h @@ -34,97 +34,23 @@ struct StaticVisitor /// F is template parameter, to allow universal reference for field, that is useful for const and non-const values. template -typename std::decay_t::ResultType applyVisitor(Visitor && visitor, F && field) +auto applyVisitor(Visitor && visitor, F && field) { - switch (field.getType()) - { - case Field::Types::Null: return visitor(field.template get()); - case Field::Types::UInt64: return visitor(field.template get()); - case Field::Types::UInt128: return visitor(field.template get()); - case Field::Types::Int64: return visitor(field.template get()); - case Field::Types::Float64: return visitor(field.template get()); - case Field::Types::String: return visitor(field.template get()); - case Field::Types::Array: return visitor(field.template get()); - case Field::Types::Tuple: return visitor(field.template get()); - case Field::Types::Decimal32: return visitor(field.template get>()); - case Field::Types::Decimal64: return visitor(field.template get>()); - case Field::Types::Decimal128: return visitor(field.template get>()); - case Field::Types::AggregateFunctionState: return visitor(field.template get()); - - default: - throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); - } -} - - -template -static typename std::decay_t::ResultType applyBinaryVisitorImpl(Visitor && visitor, F1 && field1, F2 && field2) -{ - switch (field2.getType()) - { - case Field::Types::Null: return visitor(field1, field2.template get()); - case Field::Types::UInt64: return visitor(field1, field2.template get()); - case Field::Types::UInt128: return visitor(field1, field2.template get()); - case Field::Types::Int64: return visitor(field1, field2.template get()); - case Field::Types::Float64: return visitor(field1, field2.template get()); - case Field::Types::String: return visitor(field1, field2.template get()); - case Field::Types::Array: return visitor(field1, field2.template get()); - case Field::Types::Tuple: return visitor(field1, field2.template get()); - case Field::Types::Decimal32: return visitor(field1, field2.template get>()); - case Field::Types::Decimal64: return visitor(field1, field2.template get>()); - case Field::Types::Decimal128: return visitor(field1, field2.template get>()); - case Field::Types::AggregateFunctionState: return visitor(field1, field2.template get()); - - default: - throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); - } + return Field::dispatch(visitor, field); } template -typename std::decay_t::ResultType applyVisitor(Visitor && visitor, F1 && field1, F2 && field2) +auto applyVisitor(Visitor && visitor, F1 && field1, F2 && field2) { - switch (field1.getType()) - { - case Field::Types::Null: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::UInt64: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::UInt128: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::Int64: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::Float64: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::String: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::Array: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::Tuple: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - case Field::Types::Decimal32: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get>(), std::forward(field2)); - case Field::Types::Decimal64: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get>(), std::forward(field2)); - case Field::Types::Decimal128: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get>(), std::forward(field2)); - case Field::Types::AggregateFunctionState: - return applyBinaryVisitorImpl( - std::forward(visitor), field1.template get(), std::forward(field2)); - - default: - throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); - } + return Field::dispatch([&](auto & field1_value) + { + return Field::dispatch([&](auto & field2_value) + { + return visitor(field1_value, field2_value); + }, + field2); + }, + field1); } @@ -473,8 +399,14 @@ private: public: explicit FieldVisitorSum(const Field & rhs_) : rhs(rhs_) {} - bool operator() (UInt64 & x) const { x += get(rhs); return x != 0; } - bool operator() (Int64 & x) const { x += get(rhs); return x != 0; } + // We can add all ints as unsigned regardless of their actual signedness. + bool operator() (Int64 & x) const { return this->operator()(reinterpret_cast(x)); } + bool operator() (UInt64 & x) const + { + x += rhs.reinterpret(); + return x != 0; + } + bool operator() (Float64 & x) const { x += get(rhs); return x != 0; } bool operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Core/Field.cpp b/dbms/src/Core/Field.cpp index 505627aaedb..861a75cd28d 100644 --- a/dbms/src/Core/Field.cpp +++ b/dbms/src/Core/Field.cpp @@ -295,7 +295,7 @@ namespace DB void writeFieldText(const Field & x, WriteBuffer & buf) { - DB::String res = applyVisitor(DB::FieldVisitorToString(), x); + DB::String res = Field::dispatch(DB::FieldVisitorToString(), x); buf.write(res.data(), res.size()); } diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index 885545844f4..2e9f282b5b1 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -27,7 +27,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -template +template struct NearestFieldTypeImpl; template @@ -151,6 +151,54 @@ private: UInt32 scale; }; +/// char may be signed or unsigned, and behave identically to signed char or unsigned char, +/// but they are always three different types. +/// signedness of char is different in Linux on x86 and Linux on ARM. +template <> struct NearestFieldTypeImpl { using Type = std::conditional_t, Int64, UInt64>; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; + +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; + +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt128; }; +template <> struct NearestFieldTypeImpl { using Type = UInt128; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; + +/// long and long long are always different types that may behave identically or not. +/// This is different on Linux and Mac. +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = Int64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; + +template <> struct NearestFieldTypeImpl { using Type = Int128; }; +template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; +template <> struct NearestFieldTypeImpl { using Type = Float64; }; +template <> struct NearestFieldTypeImpl { using Type = Float64; }; +template <> struct NearestFieldTypeImpl { using Type = String; }; +template <> struct NearestFieldTypeImpl { using Type = String; }; +template <> struct NearestFieldTypeImpl { using Type = Array; }; +template <> struct NearestFieldTypeImpl { using Type = Tuple; }; +template <> struct NearestFieldTypeImpl { using Type = UInt64; }; +template <> struct NearestFieldTypeImpl { using Type = Null; }; + +template <> struct NearestFieldTypeImpl { using Type = AggregateFunctionStateData; }; + +// For enum types, use the field type that corresponds to their underlying type. +template +struct NearestFieldTypeImpl>> +{ + using Type = NearestFieldType>; +}; + /** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector. * NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes. */ @@ -314,18 +362,24 @@ public: bool isNull() const { return which == Types::Null; } - template T & get() + template + T & get(); + + template + const T & get() const { - using TWithoutRef = std::remove_reference_t; - TWithoutRef * MAY_ALIAS ptr = reinterpret_cast(&storage); - return *ptr; + auto mutable_this = const_cast *>(this); + return mutable_this->get(); } - template const T & get() const + template + T & reinterpret(); + + template + const T & reinterpret() const { - using TWithoutRef = std::remove_reference_t; - const TWithoutRef * MAY_ALIAS ptr = reinterpret_cast(&storage); - return *ptr; + auto mutable_this = const_cast *>(this); + return mutable_this->reinterpret(); } template bool tryGet(T & result) @@ -427,6 +481,8 @@ public: return rhs <= *this; } + // More like bitwise equality as opposed to semantic equality: + // Null equals Null and NaN equals NaN. bool operator== (const Field & rhs) const { if (which != rhs.which) @@ -435,9 +491,13 @@ public: switch (which) { case Types::Null: return true; - case Types::UInt64: - case Types::Int64: - case Types::Float64: return get() == rhs.get(); + case Types::UInt64: return get() == rhs.get(); + case Types::Int64: return get() == rhs.get(); + case Types::Float64: + { + // Compare as UInt64 so that NaNs compare as equal. + return reinterpret() == rhs.reinterpret(); + } case Types::String: return get() == rhs.get(); case Types::Array: return get() == rhs.get(); case Types::Tuple: return get() == rhs.get(); @@ -457,6 +517,42 @@ public: return !(*this == rhs); } + /// Field is template parameter, to allow universal reference for field, + /// that is useful for const and non-const . + template + static auto dispatch(F && f, FieldRef && field) + { + switch (field.which) + { + case Types::Null: return f(field.template get()); + case Types::UInt64: return f(field.template get()); + case Types::UInt128: return f(field.template get()); + case Types::Int64: return f(field.template get()); + case Types::Float64: return f(field.template get()); + case Types::String: return f(field.template get()); + case Types::Array: return f(field.template get()); + case Types::Tuple: return f(field.template get()); + case Types::Decimal32: return f(field.template get>()); + case Types::Decimal64: return f(field.template get>()); + case Types::Decimal128: return f(field.template get>()); + case Types::AggregateFunctionState: return f(field.template get()); + case Types::Int128: + // TODO: investigate where we need Int128 Fields. There are no + // field visitors that support them, and they only arise indirectly + // in some functions that use Decimal columns: they get the + // underlying Field value with get(). Probably should be + // switched to DecimalField, but this is a whole endeavor in itself. + throw Exception("Unexpected Int128 in Field::dispatch()", ErrorCodes::LOGICAL_ERROR); + } + + // GCC 9 complains that control reaches the end, despite that we handle + // all the cases above (maybe because of throw?). Return something to + // silence it. + Null null{}; + return f(null); + } + + private: std::aligned_union_t /// Field template parameter may be const or non-const Field. - static void dispatch(F && f, Field & field) - { - switch (field.which) - { - case Types::Null: f(field.template get()); return; - -// gcc 7.3.0 -#if !__clang__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif - case Types::UInt64: f(field.template get()); return; - case Types::UInt128: f(field.template get()); return; - case Types::Int64: f(field.template get()); return; - case Types::Int128: f(field.template get()); return; - case Types::Float64: f(field.template get()); return; -#if !__clang__ -#pragma GCC diagnostic pop -#endif - case Types::String: f(field.template get()); return; - case Types::Array: f(field.template get()); return; - case Types::Tuple: f(field.template get()); return; - case Types::Decimal32: f(field.template get>()); return; - case Types::Decimal64: f(field.template get>()); return; - case Types::Decimal128: f(field.template get>()); return; - case Types::AggregateFunctionState: f(field.template get()); return; - } - } - - void create(const Field & x) { dispatch([this] (auto & value) { createConcrete(value); }, x); @@ -621,6 +686,22 @@ template <> struct Field::EnumToType { using Type = Dec template <> struct Field::EnumToType { using Type = DecimalField; }; template <> struct Field::EnumToType { using Type = DecimalField; }; +template +T & Field::get() +{ + using ValueType = std::decay_t; + //assert(TypeToEnum>::value == which); + ValueType * MAY_ALIAS ptr = reinterpret_cast(&storage); + return *ptr; +} + +template +T & Field::reinterpret() +{ + using ValueType = std::decay_t; + ValueType * MAY_ALIAS ptr = reinterpret_cast(&storage); + return *ptr; +} template T get(const Field & field) @@ -651,49 +732,6 @@ template <> struct TypeName { static std::string get() { return "Array"; template <> struct TypeName { static std::string get() { return "Tuple"; } }; template <> struct TypeName { static std::string get() { return "AggregateFunctionState"; } }; - - -/// char may be signed or unsigned, and behave identically to signed char or unsigned char, -/// but they are always three different types. -/// signedness of char is different in Linux on x86 and Linux on ARM. -template <> struct NearestFieldTypeImpl { using Type = std::conditional_t, Int64, UInt64>; }; -template <> struct NearestFieldTypeImpl { using Type = Int64; }; -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; - -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; - -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; -template <> struct NearestFieldTypeImpl { using Type = UInt128; }; -template <> struct NearestFieldTypeImpl { using Type = UInt128; }; -template <> struct NearestFieldTypeImpl { using Type = Int64; }; -template <> struct NearestFieldTypeImpl { using Type = Int64; }; - -/// long and long long are always different types that may behave identically or not. -/// This is different on Linux and Mac. -template <> struct NearestFieldTypeImpl { using Type = Int64; }; -template <> struct NearestFieldTypeImpl { using Type = Int64; }; -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; - -template <> struct NearestFieldTypeImpl { using Type = Int128; }; -template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; -template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; -template <> struct NearestFieldTypeImpl { using Type = DecimalField; }; -template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; -template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; -template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; -template <> struct NearestFieldTypeImpl { using Type = Float64; }; -template <> struct NearestFieldTypeImpl { using Type = Float64; }; -template <> struct NearestFieldTypeImpl { using Type = String; }; -template <> struct NearestFieldTypeImpl { using Type = String; }; -template <> struct NearestFieldTypeImpl { using Type = Array; }; -template <> struct NearestFieldTypeImpl { using Type = Tuple; }; -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; -template <> struct NearestFieldTypeImpl { using Type = Null; }; - -template <> struct NearestFieldTypeImpl { using Type = AggregateFunctionStateData; }; - template decltype(auto) castToNearestFieldType(T && x) { diff --git a/dbms/src/Interpreters/PartLog.cpp b/dbms/src/Interpreters/PartLog.cpp index c860f3212c7..d6a23f44904 100644 --- a/dbms/src/Interpreters/PartLog.cpp +++ b/dbms/src/Interpreters/PartLog.cpp @@ -15,8 +15,6 @@ namespace DB { -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; - Block PartLogElement::createBlock() { auto event_type_datatype = std::make_shared( diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp index 7cca320b04b..d9b86ea91ea 100644 --- a/dbms/src/Interpreters/QueryLog.cpp +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -21,8 +21,6 @@ namespace DB { -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; - Block QueryLogElement::createBlock() { auto query_status_datatype = std::make_shared( diff --git a/dbms/src/Interpreters/TextLog.cpp b/dbms/src/Interpreters/TextLog.cpp index 489d0469ca0..7cb27782cb2 100644 --- a/dbms/src/Interpreters/TextLog.cpp +++ b/dbms/src/Interpreters/TextLog.cpp @@ -10,8 +10,6 @@ namespace DB { -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; - Block TextLogElement::createBlock() { auto priority_datatype = std::make_shared(