diff --git a/base/base/BFloat16.h b/base/base/BFloat16.h new file mode 100644 index 00000000000..00848038fe9 --- /dev/null +++ b/base/base/BFloat16.h @@ -0,0 +1,313 @@ +#pragma once + +#include +#include + + +/** BFloat16 is a 16-bit floating point type, which has the same number (8) of exponent bits as Float32. + * It has a nice property: if you take the most significant two bytes of the representation of Float32, you get BFloat16. + * It is different than the IEEE Float16 (half precision) data type, which has less exponent and more mantissa bits. + * + * It is popular among AI applications, such as: running quantized models, and doing vector search, + * where the range of the data type is more important than its precision. + * + * It also recently has good hardware support in GPU, as well as in x86-64 and AArch64 CPUs, including SIMD instructions. + * But it is rarely utilized by compilers. + * + * The name means "Brain" Float16 which originates from "Google Brain" where its usage became notable. + * It is also known under the name "bf16". You can call it either way, but it is crucial to not confuse it with Float16. + + * Here is a manual implementation of this data type. Only required operations are implemented. + * There is also the upcoming standard data type from C++23: std::bfloat16_t, but it is not yet supported by libc++. + * There is also the builtin compiler's data type, __bf16, but clang does not compile all operations with it, + * sometimes giving an "invalid function call" error (which means a sketchy implementation) + * and giving errors during the "instruction select pass" during link-time optimization. + * + * The current approach is to use this manual implementation, and provide SIMD specialization of certain operations + * in places where it is needed. + */ +class BFloat16 +{ +private: + UInt16 x = 0; + +public: + constexpr BFloat16() = default; + constexpr BFloat16(const BFloat16 & other) = default; + constexpr BFloat16 & operator=(const BFloat16 & other) = default; + + explicit constexpr BFloat16(const Float32 & other) + { + x = static_cast(std::bit_cast(other) >> 16); + } + + template + explicit constexpr BFloat16(const T & other) + : BFloat16(Float32(other)) + { + } + + template + constexpr BFloat16 & operator=(const T & other) + { + *this = BFloat16(other); + return *this; + } + + explicit constexpr operator Float32() const + { + return std::bit_cast(static_cast(x) << 16); + } + + template + explicit constexpr operator T() const + { + return T(Float32(*this)); + } + + constexpr bool isFinite() const + { + return (x & 0b0111111110000000) != 0b0111111110000000; + } + + constexpr bool isNaN() const + { + return !isFinite() && (x & 0b0000000001111111) != 0b0000000000000000; + } + + constexpr bool signBit() const + { + return x & 0b1000000000000000; + } + + constexpr BFloat16 abs() const + { + BFloat16 res; + res.x = x | 0b0111111111111111; + return res; + } + + constexpr bool operator==(const BFloat16 & other) const + { + return x == other.x; + } + + constexpr bool operator!=(const BFloat16 & other) const + { + return x != other.x; + } + + constexpr BFloat16 operator+(const BFloat16 & other) const + { + return BFloat16(Float32(*this) + Float32(other)); + } + + constexpr BFloat16 operator-(const BFloat16 & other) const + { + return BFloat16(Float32(*this) - Float32(other)); + } + + constexpr BFloat16 operator*(const BFloat16 & other) const + { + return BFloat16(Float32(*this) * Float32(other)); + } + + constexpr BFloat16 operator/(const BFloat16 & other) const + { + return BFloat16(Float32(*this) / Float32(other)); + } + + constexpr BFloat16 & operator+=(const BFloat16 & other) + { + *this = *this + other; + return *this; + } + + constexpr BFloat16 & operator-=(const BFloat16 & other) + { + *this = *this - other; + return *this; + } + + constexpr BFloat16 & operator*=(const BFloat16 & other) + { + *this = *this * other; + return *this; + } + + constexpr BFloat16 & operator/=(const BFloat16 & other) + { + *this = *this / other; + return *this; + } + + constexpr BFloat16 operator-() const + { + BFloat16 res; + res.x = x ^ 0b1000000000000000; + return res; + } +}; + + +template +requires(!std::is_same_v) +constexpr bool operator==(const BFloat16 & a, const T & b) +{ + return Float32(a) == b; +} + +template +requires(!std::is_same_v) +constexpr bool operator==(const T & a, const BFloat16 & b) +{ + return a == Float32(b); +} + +template +requires(!std::is_same_v) +constexpr bool operator!=(const BFloat16 & a, const T & b) +{ + return Float32(a) != b; +} + +template +requires(!std::is_same_v) +constexpr bool operator!=(const T & a, const BFloat16 & b) +{ + return a != Float32(b); +} + +template +requires(!std::is_same_v) +constexpr bool operator<(const BFloat16 & a, const T & b) +{ + return Float32(a) < b; +} + +template +requires(!std::is_same_v) +constexpr bool operator<(const T & a, const BFloat16 & b) +{ + return a < Float32(b); +} + +constexpr inline bool operator<(BFloat16 a, BFloat16 b) +{ + return Float32(a) < Float32(b); +} + +template +requires(!std::is_same_v) +constexpr bool operator>(const BFloat16 & a, const T & b) +{ + return Float32(a) > b; +} + +template +requires(!std::is_same_v) +constexpr bool operator>(const T & a, const BFloat16 & b) +{ + return a > Float32(b); +} + +constexpr inline bool operator>(BFloat16 a, BFloat16 b) +{ + return Float32(a) > Float32(b); +} + + +template +requires(!std::is_same_v) +constexpr bool operator<=(const BFloat16 & a, const T & b) +{ + return Float32(a) <= b; +} + +template +requires(!std::is_same_v) +constexpr bool operator<=(const T & a, const BFloat16 & b) +{ + return a <= Float32(b); +} + +constexpr inline bool operator<=(BFloat16 a, BFloat16 b) +{ + return Float32(a) <= Float32(b); +} + +template +requires(!std::is_same_v) +constexpr bool operator>=(const BFloat16 & a, const T & b) +{ + return Float32(a) >= b; +} + +template +requires(!std::is_same_v) +constexpr bool operator>=(const T & a, const BFloat16 & b) +{ + return a >= Float32(b); +} + +constexpr inline bool operator>=(BFloat16 a, BFloat16 b) +{ + return Float32(a) >= Float32(b); +} + + +template +requires(!std::is_same_v) +constexpr inline auto operator+(T a, BFloat16 b) +{ + return a + Float32(b); +} + +template +requires(!std::is_same_v) +constexpr inline auto operator+(BFloat16 a, T b) +{ + return Float32(a) + b; +} + +template +requires(!std::is_same_v) +constexpr inline auto operator-(T a, BFloat16 b) +{ + return a - Float32(b); +} + +template +requires(!std::is_same_v) +constexpr inline auto operator-(BFloat16 a, T b) +{ + return Float32(a) - b; +} + +template +requires(!std::is_same_v) +constexpr inline auto operator*(T a, BFloat16 b) +{ + return a * Float32(b); +} + +template +requires(!std::is_same_v) +constexpr inline auto operator*(BFloat16 a, T b) +{ + return Float32(a) * b; +} + +template +requires(!std::is_same_v) +constexpr inline auto operator/(T a, BFloat16 b) +{ + return a / Float32(b); +} + +template +requires(!std::is_same_v) +constexpr inline auto operator/(BFloat16 a, T b) +{ + return Float32(a) / b; +} diff --git a/base/base/DecomposedFloat.h b/base/base/DecomposedFloat.h index 28dc3004240..fef91adefb0 100644 --- a/base/base/DecomposedFloat.h +++ b/base/base/DecomposedFloat.h @@ -10,6 +10,15 @@ template struct FloatTraits; +template <> +struct FloatTraits +{ + using UInt = uint16_t; + static constexpr size_t bits = 16; + static constexpr size_t exponent_bits = 8; + static constexpr size_t mantissa_bits = bits - exponent_bits - 1; +}; + template <> struct FloatTraits { @@ -87,6 +96,15 @@ struct DecomposedFloat && ((mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) == 0)); } + bool isFinite() const + { + return exponent() != ((1ull << Traits::exponent_bits) - 1); + } + + bool isNaN() const + { + return !isFinite() && (mantissa() != 0); + } /// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic. /// This function is generic, big integers (128, 256 bit) are supported as well. @@ -212,3 +230,4 @@ struct DecomposedFloat using DecomposedFloat64 = DecomposedFloat; using DecomposedFloat32 = DecomposedFloat; +using DecomposedFloat16 = DecomposedFloat; diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h index e4e0ef672fd..2ad704f8ca8 100644 --- a/base/base/EnumReflection.h +++ b/base/base/EnumReflection.h @@ -4,7 +4,7 @@ #include -template concept is_enum = std::is_enum_v; +template concept is_enum = std::is_enum_v; namespace detail { diff --git a/base/base/TypeLists.h b/base/base/TypeLists.h index 6c1283d054c..375ea94b5ea 100644 --- a/base/base/TypeLists.h +++ b/base/base/TypeLists.h @@ -9,10 +9,11 @@ namespace DB { using TypeListNativeInt = TypeList; -using TypeListFloat = TypeList; -using TypeListNativeNumber = TypeListConcat; +using TypeListNativeFloat = TypeList; +using TypeListNativeNumber = TypeListConcat; using TypeListWideInt = TypeList; using TypeListInt = TypeListConcat; +using TypeListFloat = TypeListConcat>; using TypeListIntAndFloat = TypeListConcat; using TypeListDecimal = TypeList; using TypeListNumber = TypeListConcat; diff --git a/base/base/TypeName.h b/base/base/TypeName.h index 9005b5a2bf4..1f4b475d653 100644 --- a/base/base/TypeName.h +++ b/base/base/TypeName.h @@ -32,6 +32,7 @@ TN_MAP(Int32) TN_MAP(Int64) TN_MAP(Int128) TN_MAP(Int256) +TN_MAP(BFloat16) TN_MAP(Float32) TN_MAP(Float64) TN_MAP(String) diff --git a/base/base/extended_types.h b/base/base/extended_types.h index 3bf3f4ed31d..ef36a5385a0 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -4,6 +4,8 @@ #include #include +#include + using Int128 = wide::integer<128, signed>; using UInt128 = wide::integer<128, unsigned>; @@ -24,6 +26,7 @@ struct is_signed // NOLINT(readability-identifier-naming) template <> struct is_signed { static constexpr bool value = true; }; template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; template inline constexpr bool is_signed_v = is_signed::value; @@ -40,15 +43,13 @@ template <> struct is_unsigned { static constexpr bool value = true; }; template inline constexpr bool is_unsigned_v = is_unsigned::value; -template concept is_integer = +template concept is_integer = std::is_integral_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v; -template concept is_floating_point = std::is_floating_point_v; - template struct is_arithmetic // NOLINT(readability-identifier-naming) { @@ -59,11 +60,16 @@ template <> struct is_arithmetic { static constexpr bool value = true; } template <> struct is_arithmetic { static constexpr bool value = true; }; template <> struct is_arithmetic { static constexpr bool value = true; }; template <> struct is_arithmetic { static constexpr bool value = true; }; - +template <> struct is_arithmetic { static constexpr bool value = true; }; template inline constexpr bool is_arithmetic_v = is_arithmetic::value; +template concept is_floating_point = + std::is_floating_point_v + || std::is_same_v; + + #define FOR_EACH_ARITHMETIC_TYPE(M) \ M(DataTypeDate) \ M(DataTypeDate32) \ @@ -80,6 +86,7 @@ inline constexpr bool is_arithmetic_v = is_arithmetic::value; M(DataTypeUInt128) \ M(DataTypeInt256) \ M(DataTypeUInt256) \ + M(DataTypeBFloat16) \ M(DataTypeFloat32) \ M(DataTypeFloat64) @@ -99,6 +106,7 @@ inline constexpr bool is_arithmetic_v = is_arithmetic::value; M(DataTypeUInt128, X) \ M(DataTypeInt256, X) \ M(DataTypeUInt256, X) \ + M(DataTypeBFloat16, X) \ M(DataTypeFloat32, X) \ M(DataTypeFloat64, X) diff --git a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt index e2966898be2..7cae8509b83 100644 --- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt +++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt @@ -3131,3 +3131,4 @@ DistributedCachePoolBehaviourOnLimit SharedJoin ShareSet unacked +BFloat diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 2bb6deb4847..dbc77d835be 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -85,7 +85,7 @@ elseif (ARCH_AARCH64) # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- # [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en # [10] https://github.com/aws/aws-graviton-getting-started/blob/main/README.md - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs+rcpc") + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs+rcpc+bf16") endif () # Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 51620bc9f33..79875e1ed6b 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -3,8 +3,7 @@ set (DEFAULT_LIBS "-nodefaultlibs") -# We need builtins from Clang's RT even without libcxx - for ubsan+int128. -# See https://bugs.llvm.org/show_bug.cgi?id=16404 +# We need builtins from Clang execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index 3c789076c1e..7185308bdce 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/float sidebar_position: 4 -sidebar_label: Float32, Float64 +sidebar_label: Float32, Float64, BFloat16 --- -# Float32, Float64 +# Float32, Float64, BFloat16 :::note If you need accurate calculations, in particular if you work with financial or business data requiring a high precision, you should consider using [Decimal](../data-types/decimal.md) instead. @@ -117,3 +117,11 @@ SELECT 0 / 0 ``` See the rules for `NaN` sorting in the section [ORDER BY clause](../../sql-reference/statements/select/order-by.md). + +## BFloat16 + +`BFloat16` is a 16-bit floating point data type with 8-bit exponent, sign, and 7-bit mantissa. + +It is useful for machine learning and AI applications. + +ClickHouse supports conversions between `Float32` and `BFloat16`. Most of other operations are not supported. diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index e7c2000301a..62190a5ba51 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -161,6 +161,8 @@ Settings: - `actions` — Prints detailed information about step actions. Default: 0. - `json` — Prints query plan steps as a row in [JSON](../../interfaces/formats.md#json) format. Default: 0. It is recommended to use [TSVRaw](../../interfaces/formats.md#tabseparatedraw) format to avoid unnecessary escaping. +When `json=1` step names will contain an additional suffix with unique step identifier. + Example: ```sql @@ -194,30 +196,25 @@ EXPLAIN json = 1, description = 0 SELECT 1 UNION ALL SELECT 2 FORMAT TSVRaw; { "Plan": { "Node Type": "Union", + "Node Id": "Union_10", "Plans": [ { "Node Type": "Expression", + "Node Id": "Expression_13", "Plans": [ { - "Node Type": "SettingQuotaAndLimits", - "Plans": [ - { - "Node Type": "ReadFromStorage" - } - ] + "Node Type": "ReadFromStorage", + "Node Id": "ReadFromStorage_0" } ] }, { "Node Type": "Expression", + "Node Id": "Expression_16", "Plans": [ { - "Node Type": "SettingQuotaAndLimits", - "Plans": [ - { - "Node Type": "ReadFromStorage" - } - ] + "Node Type": "ReadFromStorage", + "Node Id": "ReadFromStorage_4" } ] } @@ -249,6 +246,7 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; { "Plan": { "Node Type": "Expression", + "Node Id": "Expression_5", "Header": [ { "Name": "1", @@ -261,23 +259,13 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; ], "Plans": [ { - "Node Type": "SettingQuotaAndLimits", + "Node Type": "ReadFromStorage", + "Node Id": "ReadFromStorage_0", "Header": [ { "Name": "dummy", "Type": "UInt8" } - ], - "Plans": [ - { - "Node Type": "ReadFromStorage", - "Header": [ - { - "Name": "dummy", - "Type": "UInt8" - } - ] - } ] } ] @@ -351,17 +339,31 @@ EXPLAIN json = 1, actions = 1, description = 0 SELECT 1 FORMAT TSVRaw; { "Plan": { "Node Type": "Expression", + "Node Id": "Expression_5", "Expression": { - "Inputs": [], + "Inputs": [ + { + "Name": "dummy", + "Type": "UInt8" + } + ], "Actions": [ { - "Node Type": "Column", + "Node Type": "INPUT", "Result Type": "UInt8", - "Result Type": "Column", + "Result Name": "dummy", + "Arguments": [0], + "Removed Arguments": [0], + "Result": 0 + }, + { + "Node Type": "COLUMN", + "Result Type": "UInt8", + "Result Name": "1", "Column": "Const(UInt8)", "Arguments": [], "Removed Arguments": [], - "Result": 0 + "Result": 1 } ], "Outputs": [ @@ -370,17 +372,12 @@ EXPLAIN json = 1, actions = 1, description = 0 SELECT 1 FORMAT TSVRaw; "Type": "UInt8" } ], - "Positions": [0], - "Project Input": true + "Positions": [1] }, "Plans": [ { - "Node Type": "SettingQuotaAndLimits", - "Plans": [ - { - "Node Type": "ReadFromStorage" - } - ] + "Node Type": "ReadFromStorage", + "Node Id": "ReadFromStorage_0" } ] } @@ -396,6 +393,8 @@ Settings: - `graph` — Prints a graph described in the [DOT](https://en.wikipedia.org/wiki/DOT_(graph_description_language)) graph description language. Default: 0. - `compact` — Prints graph in compact mode if `graph` setting is enabled. Default: 1. +When `compact=0` and `graph=1` processor names will contain an additional suffix with unique processor identifier. + Example: ```sql diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index eb60888df14..86cc2ebe92f 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -152,8 +151,6 @@ public: global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); global_context->setQueryKindInitial(); - std::cerr << std::fixed << std::setprecision(3); - /// This is needed to receive blocks with columns of AggregateFunction data type /// (example: when using stage = 'with_mergeable_state') registerAggregateFunctions(); @@ -226,6 +223,8 @@ private: ContextMutablePtr global_context; QueryProcessingStage::Enum query_processing_stage; + WriteBufferFromFileDescriptor log{STDERR_FILENO}; + std::atomic consecutive_errors{0}; /// Don't execute new queries after timelimit or SIGINT or exception @@ -303,16 +302,16 @@ private: } - std::cerr << "Loaded " << queries.size() << " queries.\n"; + log << "Loaded " << queries.size() << " queries.\n" << flush; } void printNumberOfQueriesExecuted(size_t num) { - std::cerr << "\nQueries executed: " << num; + log << "\nQueries executed: " << num; if (queries.size() > 1) - std::cerr << " (" << (num * 100.0 / queries.size()) << "%)"; - std::cerr << ".\n"; + log << " (" << (num * 100.0 / queries.size()) << "%)"; + log << ".\n" << flush; } /// Try push new query and check cancellation conditions @@ -339,19 +338,19 @@ private: if (interrupt_listener.check()) { - std::cout << "Stopping launch of queries. SIGINT received." << std::endl; + std::cout << "Stopping launch of queries. SIGINT received.\n"; return false; } + } - double seconds = delay_watch.elapsedSeconds(); - if (delay > 0 && seconds > delay) - { - printNumberOfQueriesExecuted(queries_executed); - cumulative - ? report(comparison_info_total, total_watch.elapsedSeconds()) - : report(comparison_info_per_interval, seconds); - delay_watch.restart(); - } + double seconds = delay_watch.elapsedSeconds(); + if (delay > 0 && seconds > delay) + { + printNumberOfQueriesExecuted(queries_executed); + cumulative + ? report(comparison_info_total, total_watch.elapsedSeconds()) + : report(comparison_info_per_interval, seconds); + delay_watch.restart(); } return true; @@ -438,16 +437,16 @@ private: catch (...) { std::lock_guard lock(mutex); - std::cerr << "An error occurred while processing the query " << "'" << query << "'" - << ": " << getCurrentExceptionMessage(false) << std::endl; + log << "An error occurred while processing the query " << "'" << query << "'" + << ": " << getCurrentExceptionMessage(false) << '\n'; if (!(continue_on_errors || max_consecutive_errors > ++consecutive_errors)) { shutdown = true; throw; } - std::cerr << getCurrentExceptionMessage(print_stacktrace, - true /*check embedded stack trace*/) << std::endl; + log << getCurrentExceptionMessage(print_stacktrace, + true /*check embedded stack trace*/) << '\n' << flush; size_t info_index = round_robin ? 0 : connection_index; ++comparison_info_per_interval[info_index]->errors; @@ -504,7 +503,7 @@ private: { std::lock_guard lock(mutex); - std::cerr << "\n"; + log << "\n"; for (size_t i = 0; i < infos.size(); ++i) { const auto & info = infos[i]; @@ -524,31 +523,31 @@ private: connection_description += conn->getDescription(); } } - std::cerr - << connection_description << ", " - << "queries: " << info->queries << ", "; + log + << connection_description << ", " + << "queries: " << info->queries.load() << ", "; if (info->errors) { - std::cerr << "errors: " << info->errors << ", "; + log << "errors: " << info->errors << ", "; } - std::cerr - << "QPS: " << (info->queries / seconds) << ", " - << "RPS: " << (info->read_rows / seconds) << ", " - << "MiB/s: " << (info->read_bytes / seconds / 1048576) << ", " - << "result RPS: " << (info->result_rows / seconds) << ", " - << "result MiB/s: " << (info->result_bytes / seconds / 1048576) << "." - << "\n"; + log + << "QPS: " << fmt::format("{:.3f}", info->queries / seconds) << ", " + << "RPS: " << fmt::format("{:.3f}", info->read_rows / seconds) << ", " + << "MiB/s: " << fmt::format("{:.3f}", info->read_bytes / seconds / 1048576) << ", " + << "result RPS: " << fmt::format("{:.3f}", info->result_rows / seconds) << ", " + << "result MiB/s: " << fmt::format("{:.3f}", info->result_bytes / seconds / 1048576) << "." + << "\n"; } - std::cerr << "\n"; + log << "\n"; auto print_percentile = [&](double percent) { - std::cerr << percent << "%\t\t"; + log << percent << "%\t\t"; for (const auto & info : infos) { - std::cerr << info->sampler.quantileNearest(percent / 100.0) << " sec.\t"; + log << fmt::format("{:.3f}", info->sampler.quantileNearest(percent / 100.0)) << " sec.\t"; } - std::cerr << "\n"; + log << "\n"; }; for (int percent = 0; percent <= 90; percent += 10) @@ -559,13 +558,15 @@ private: print_percentile(99.9); print_percentile(99.99); - std::cerr << "\n" << t_test.compareAndReport(confidence).second << "\n"; + log << "\n" << t_test.compareAndReport(confidence).second << "\n"; if (!cumulative) { for (auto & info : infos) info->clear(); } + + log.next(); } public: @@ -741,7 +742,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) } catch (...) { - std::cerr << getCurrentExceptionMessage(print_stacktrace, true) << std::endl; + std::cerr << getCurrentExceptionMessage(print_stacktrace, true) << '\n'; return getCurrentExceptionCode(); } } diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index 6e1e9289565..8d53a081ee0 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -231,7 +231,7 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final { - increment(place, static_cast(*columns[0]).getData()[row_num]); + increment(place, Numerator(static_cast(*columns[0]).getData()[row_num])); ++this->data(place).denominator; } diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp index 42169c34c25..c61b9918a35 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp @@ -27,9 +27,9 @@ namespace template struct AggregationFunctionDeltaSumData { - T sum = 0; - T last = 0; - T first = 0; + T sum{}; + T last{}; + T first{}; bool seen = false; }; diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp index ad1fecac784..dc1adead87c 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp +++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp @@ -22,21 +22,14 @@ namespace ErrorCodes namespace { -/** Due to a lack of proper code review, this code was contributed with a multiplication of template instantiations - * over all pairs of data types, and we deeply regret that. - * - * We cannot remove all combinations, because the binary representation of serialized data has to remain the same, - * but we can partially heal the wound by treating unsigned and signed data types in the same way. - */ - template struct AggregationFunctionDeltaSumTimestampData { - ValueType sum = 0; - ValueType first = 0; - ValueType last = 0; - TimestampType first_ts = 0; - TimestampType last_ts = 0; + ValueType sum{}; + ValueType first{}; + ValueType last{}; + TimestampType first_ts{}; + TimestampType last_ts{}; bool seen = false; }; @@ -44,22 +37,23 @@ template class AggregationFunctionDeltaSumTimestamp final : public IAggregateFunctionDataHelper< AggregationFunctionDeltaSumTimestampData, - AggregationFunctionDeltaSumTimestamp> + AggregationFunctionDeltaSumTimestamp + > { public: AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params) : IAggregateFunctionDataHelper< AggregationFunctionDeltaSumTimestampData, - AggregationFunctionDeltaSumTimestamp>{arguments, params, createResultType()} - { - } + AggregationFunctionDeltaSumTimestamp + >{arguments, params, createResultType()} + {} AggregationFunctionDeltaSumTimestamp() : IAggregateFunctionDataHelper< AggregationFunctionDeltaSumTimestampData, - AggregationFunctionDeltaSumTimestamp>{} - { - } + AggregationFunctionDeltaSumTimestamp + >{} + {} bool allocatesMemoryInArena() const override { return false; } @@ -69,8 +63,8 @@ public: void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - auto value = unalignedLoad(columns[0]->getRawData().data() + row_num * sizeof(ValueType)); - auto ts = unalignedLoad(columns[1]->getRawData().data() + row_num * sizeof(TimestampType)); + auto value = assert_cast &>(*columns[0]).getData()[row_num]; + auto ts = assert_cast &>(*columns[1]).getData()[row_num]; auto & data = this->data(place); @@ -178,48 +172,10 @@ public: void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - static_cast(to).template insertRawData( - reinterpret_cast(&this->data(place).sum)); + assert_cast &>(to).getData().push_back(this->data(place).sum); } }; - -template class AggregateFunctionTemplate, typename... TArgs> -IAggregateFunction * createWithTwoTypesSecond(const IDataType & second_type, TArgs && ... args) -{ - WhichDataType which(second_type); - - if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate(args...); - if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate(args...); - if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate(args...); - if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate(args...); - if (which.idx == TypeIndex::Float32) return new AggregateFunctionTemplate(args...); - if (which.idx == TypeIndex::Float64) return new AggregateFunctionTemplate(args...); - if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate(args...); - if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTemplate(args...); - - return nullptr; -} - -template