diff --git a/dbms/src/Functions/FunctionsConsistentHashing.cpp b/dbms/src/Functions/FunctionsConsistentHashing.cpp deleted file mode 100644 index 7f93257774b..00000000000 --- a/dbms/src/Functions/FunctionsConsistentHashing.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "FunctionsConsistentHashing.h" -#include - - -namespace DB -{ - -void registerFunctionsConsistentHashing(FunctionFactory & factory) -{ - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); -} - -} diff --git a/dbms/src/Functions/FunctionsConsistentHashing.h b/dbms/src/Functions/FunctionsConsistentHashing.h index 04640e0ec57..25c6c868306 100644 --- a/dbms/src/Functions/FunctionsConsistentHashing.h +++ b/dbms/src/Functions/FunctionsConsistentHashing.h @@ -8,9 +8,6 @@ #include #include -#include -#include - namespace DB { @@ -23,69 +20,6 @@ namespace ErrorCodes } -/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov -struct YandexConsistentHashImpl -{ - static constexpr auto name = "yandexConsistentHash"; - - using HashType = UInt64; - /// Actually it supports UInt64, but it is efficient only if n <= 32768 - using ResultType = UInt16; - using BucketsType = ResultType; - static constexpr auto max_buckets = 32768; - - static inline ResultType apply(UInt64 hash, BucketsType n) - { - return ConsistentHashing(hash, n); - } -}; - - -/// Code from https://arxiv.org/pdf/1406.2294.pdf -static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets) -{ - int64_t b = -1, j = 0; - while (j < num_buckets) - { - b = j; - key = key * 2862933555777941757ULL + 1; - j = static_cast((b + 1) * (double(1LL << 31) / double((key >> 33) + 1))); - } - return static_cast(b); -} - -struct JumpConsistentHashImpl -{ - static constexpr auto name = "jumpConsistentHash"; - - using HashType = UInt64; - using ResultType = Int32; - using BucketsType = ResultType; - static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); - - static inline ResultType apply(UInt64 hash, BucketsType n) - { - return JumpConsistentHash(hash, n); - } -}; - - -struct SumburConsistentHashImpl -{ - static constexpr auto name = "sumburConsistentHash"; - - using HashType = UInt32; - using ResultType = UInt16; - using BucketsType = ResultType; - static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); - - static inline ResultType apply(HashType hash, BucketsType n) - { - return static_cast(sumburConsistentHash(hash, n)); - } -}; - - template class FunctionConsistentHashImpl : public IFunction { @@ -221,10 +155,4 @@ private: } }; - -using FunctionYandexConsistentHash = FunctionConsistentHashImpl; -using FunctionJumpConsistentHash = FunctionConsistentHashImpl; -using FunctionSumburConsistentHash = FunctionConsistentHashImpl; - - } diff --git a/dbms/src/Functions/FunctionsVisitParam.cpp b/dbms/src/Functions/FunctionsVisitParam.cpp deleted file mode 100644 index 8d82f93a3db..00000000000 --- a/dbms/src/Functions/FunctionsVisitParam.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include -#include - - -namespace DB -{ - -struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; }; -struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; }; -struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; }; -struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; }; -struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; }; -struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; }; -struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; }; - - -using FunctionVisitParamHas = FunctionsStringSearch, NameVisitParamHas>; -using FunctionVisitParamExtractUInt = FunctionsStringSearch>, NameVisitParamExtractUInt>; -using FunctionVisitParamExtractInt = FunctionsStringSearch>, NameVisitParamExtractInt>; -using FunctionVisitParamExtractFloat = FunctionsStringSearch>, NameVisitParamExtractFloat>; -using FunctionVisitParamExtractBool = FunctionsStringSearch, NameVisitParamExtractBool>; -using FunctionVisitParamExtractRaw = FunctionsStringSearchToString, NameVisitParamExtractRaw>; -using FunctionVisitParamExtractString = FunctionsStringSearchToString, NameVisitParamExtractString>; - - -void registerFunctionsVisitParam(FunctionFactory & factory) -{ - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); -} - -} diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h index 353dda930ef..41bef3947ec 100644 --- a/dbms/src/Functions/FunctionsVisitParam.h +++ b/dbms/src/Functions/FunctionsVisitParam.h @@ -1,14 +1,9 @@ #pragma once -#include -#include #include #include #include #include -#include -#include -#include #include #include #include @@ -43,15 +38,6 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -struct HasParam -{ - using ResultType = UInt8; - - static UInt8 extract(const UInt8 *, const UInt8 *) - { - return true; - } -}; template struct ExtractNumericType @@ -78,77 +64,6 @@ struct ExtractNumericType } }; -struct ExtractBool -{ - using ResultType = UInt8; - - static UInt8 extract(const UInt8 * begin, const UInt8 * end) - { - return begin + 4 <= end && 0 == strncmp(reinterpret_cast(begin), "true", 4); - } -}; - - -struct ExtractRaw -{ - using ExpectChars = PODArrayWithStackMemory; - - static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data) - { - ExpectChars expects_end; - UInt8 current_expect_end = 0; - - for (auto extract_begin = pos; pos != end; ++pos) - { - if (current_expect_end && *pos == current_expect_end) - { - expects_end.pop_back(); - current_expect_end = expects_end.empty() ? 0 : expects_end.back(); - } - else - { - switch (*pos) - { - case '[': - current_expect_end = ']'; - expects_end.push_back(current_expect_end); - break; - case '{': - current_expect_end = '}'; - expects_end.push_back(current_expect_end); - break; - case '"' : - current_expect_end = '"'; - expects_end.push_back(current_expect_end); - break; - case '\\': - /// skip backslash - if (pos + 1 < end && pos[1] == '"') - pos++; - break; - default: - if (!current_expect_end && (*pos == ',' || *pos == '}')) - { - res_data.insert(extract_begin, pos); - return; - } - } - } - } - } -}; - -struct ExtractString -{ - static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data) - { - size_t old_size = res_data.size(); - ReadBufferFromMemory in(pos, end - pos); - if (!tryReadJSONStringInto(res_data, in)) - res_data.resize(old_size); - } -}; - /** Searches for occurrences of a field in the visit parameter and calls ParamExtractor * for each occurrence of the field, passing it a pointer to the part of the string, @@ -285,6 +200,4 @@ struct ExtractParamToStringImpl } }; - - } diff --git a/dbms/src/Functions/jumpConsistentHash.cpp b/dbms/src/Functions/jumpConsistentHash.cpp new file mode 100644 index 00000000000..b1a3109c066 --- /dev/null +++ b/dbms/src/Functions/jumpConsistentHash.cpp @@ -0,0 +1,44 @@ +#include "FunctionsConsistentHashing.h" +#include + + +namespace DB +{ + +/// Code from https://arxiv.org/pdf/1406.2294.pdf +static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets) +{ + int64_t b = -1, j = 0; + while (j < num_buckets) + { + b = j; + key = key * 2862933555777941757ULL + 1; + j = static_cast((b + 1) * (double(1LL << 31) / double((key >> 33) + 1))); + } + return static_cast(b); +} + +struct JumpConsistentHashImpl +{ + static constexpr auto name = "jumpConsistentHash"; + + using HashType = UInt64; + using ResultType = Int32; + using BucketsType = ResultType; + static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); + + static inline ResultType apply(UInt64 hash, BucketsType n) + { + return JumpConsistentHash(hash, n); + } +}; + +using FunctionJumpConsistentHash = FunctionConsistentHashImpl; + +void registerFunctionJumpConsistentHash(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index eba9a96e5e0..501f8e7f90a 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -20,7 +20,6 @@ void registerFunctionsExternalDictionaries(FunctionFactory &); void registerFunctionsExternalModels(FunctionFactory &); void registerFunctionsFormatting(FunctionFactory &); void registerFunctionsHashing(FunctionFactory &); -void registerFunctionsConsistentHashing(FunctionFactory &); void registerFunctionsHigherOrder(FunctionFactory &); void registerFunctionsLogical(FunctionFactory &); void registerFunctionsMiscellaneous(FunctionFactory &); @@ -41,6 +40,7 @@ void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); void registerFunctionsIntrospection(FunctionFactory &); +void registerFunctionsConsistentHashing(FunctionFactory & factory); void registerFunctions() { @@ -60,7 +60,6 @@ void registerFunctions() registerFunctionsExternalModels(factory); registerFunctionsFormatting(factory); registerFunctionsHashing(factory); - registerFunctionsConsistentHashing(factory); registerFunctionsHigherOrder(factory); registerFunctionsLogical(factory); registerFunctionsMiscellaneous(factory); @@ -80,6 +79,7 @@ void registerFunctions() registerFunctionsFindCluster(factory); registerFunctionsJSON(factory); registerFunctionsIntrospection(factory); + registerFunctionsConsistentHashing(factory); } } diff --git a/dbms/src/Functions/registerFunctionsConsistentHashing.cpp b/dbms/src/Functions/registerFunctionsConsistentHashing.cpp new file mode 100644 index 00000000000..dc1e90826bf --- /dev/null +++ b/dbms/src/Functions/registerFunctionsConsistentHashing.cpp @@ -0,0 +1,18 @@ +namespace DB +{ + +class FunctionFactory; + +void registerFunctionYandexConsistentHash(FunctionFactory & factory); +void registerFunctionJumpConsistentHash(FunctionFactory & factory); +void registerFunctionSumburConsistentHash(FunctionFactory & factory); + +void registerFunctionsConsistentHashing(FunctionFactory & factory) +{ + registerFunctionYandexConsistentHash(factory); + registerFunctionJumpConsistentHash(factory); + registerFunctionSumburConsistentHash(factory); +} + +} + diff --git a/dbms/src/Functions/registerFunctionsVisitParam.cpp b/dbms/src/Functions/registerFunctionsVisitParam.cpp new file mode 100644 index 00000000000..01084594f08 --- /dev/null +++ b/dbms/src/Functions/registerFunctionsVisitParam.cpp @@ -0,0 +1,25 @@ +namespace DB +{ + +class FunctionFactory; + +void registerFunctionVisitParamHas(FunctionFactory & factory); +void registerFunctionVisitParamExtractUInt(FunctionFactory & factory); +void registerFunctionVisitParamExtractInt(FunctionFactory & factory); +void registerFunctionVisitParamExtractFloat(FunctionFactory & factory); +void registerFunctionVisitParamExtractBool(FunctionFactory & factory); +void registerFunctionVisitParamExtractRaw(FunctionFactory & factory); +void registerFunctionVisitParamExtractString(FunctionFactory & factory); + +void registerFunctionsVisitParam(FunctionFactory & factory) +{ + registerFunctionVisitParamHas(factory); + registerFunctionVisitParamExtractUInt(factory); + registerFunctionVisitParamExtractInt(factory); + registerFunctionVisitParamExtractFloat(factory); + registerFunctionVisitParamExtractBool(factory); + registerFunctionVisitParamExtractRaw(factory); + registerFunctionVisitParamExtractString(factory); +} + +} diff --git a/dbms/src/Functions/sumburConsistentHash.cpp b/dbms/src/Functions/sumburConsistentHash.cpp new file mode 100644 index 00000000000..1fc26502355 --- /dev/null +++ b/dbms/src/Functions/sumburConsistentHash.cpp @@ -0,0 +1,34 @@ +#include "FunctionsConsistentHashing.h" +#include + +#include + + +namespace DB +{ + +struct SumburConsistentHashImpl +{ + static constexpr auto name = "sumburConsistentHash"; + + using HashType = UInt32; + using ResultType = UInt16; + using BucketsType = ResultType; + static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); + + static inline ResultType apply(HashType hash, BucketsType n) + { + return static_cast(sumburConsistentHash(hash, n)); + } +}; + +using FunctionSumburConsistentHash = FunctionConsistentHashImpl; + +void registerFunctionSumburConsistentHash(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + + diff --git a/dbms/src/Functions/visitParamExtractBool.cpp b/dbms/src/Functions/visitParamExtractBool.cpp new file mode 100644 index 00000000000..7f989ccbb9e --- /dev/null +++ b/dbms/src/Functions/visitParamExtractBool.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + + +namespace DB +{ + +struct ExtractBool +{ + using ResultType = UInt8; + + static UInt8 extract(const UInt8 * begin, const UInt8 * end) + { + return begin + 4 <= end && 0 == strncmp(reinterpret_cast(begin), "true", 4); + } +}; + +struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; }; +using FunctionVisitParamExtractBool = FunctionsStringSearch, NameVisitParamExtractBool>; + + +void registerFunctionVisitParamExtractBool(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/visitParamExtractFloat.cpp b/dbms/src/Functions/visitParamExtractFloat.cpp new file mode 100644 index 00000000000..b02b0209daf --- /dev/null +++ b/dbms/src/Functions/visitParamExtractFloat.cpp @@ -0,0 +1,18 @@ +#include +#include +#include + + +namespace DB +{ + +struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; }; +using FunctionVisitParamExtractFloat = FunctionsStringSearch>, NameVisitParamExtractFloat>; + + +void registerFunctionVisitParamExtractFloat(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/visitParamExtractInt.cpp b/dbms/src/Functions/visitParamExtractInt.cpp new file mode 100644 index 00000000000..f3f30f566e6 --- /dev/null +++ b/dbms/src/Functions/visitParamExtractInt.cpp @@ -0,0 +1,18 @@ +#include +#include +#include + + +namespace DB +{ + +struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; }; +using FunctionVisitParamExtractInt = FunctionsStringSearch>, NameVisitParamExtractInt>; + + +void registerFunctionVisitParamExtractInt(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/visitParamExtractRaw.cpp b/dbms/src/Functions/visitParamExtractRaw.cpp new file mode 100644 index 00000000000..5eeb36286a5 --- /dev/null +++ b/dbms/src/Functions/visitParamExtractRaw.cpp @@ -0,0 +1,67 @@ +#include +#include +#include + + +namespace DB +{ + +struct ExtractRaw +{ + using ExpectChars = PODArrayWithStackMemory; + + static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data) + { + ExpectChars expects_end; + UInt8 current_expect_end = 0; + + for (auto extract_begin = pos; pos != end; ++pos) + { + if (current_expect_end && *pos == current_expect_end) + { + expects_end.pop_back(); + current_expect_end = expects_end.empty() ? 0 : expects_end.back(); + } + else + { + switch (*pos) + { + case '[': + current_expect_end = ']'; + expects_end.push_back(current_expect_end); + break; + case '{': + current_expect_end = '}'; + expects_end.push_back(current_expect_end); + break; + case '"' : + current_expect_end = '"'; + expects_end.push_back(current_expect_end); + break; + case '\\': + /// skip backslash + if (pos + 1 < end && pos[1] == '"') + pos++; + break; + default: + if (!current_expect_end && (*pos == ',' || *pos == '}')) + { + res_data.insert(extract_begin, pos); + return; + } + } + } + } + } +}; + +struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; }; +using FunctionVisitParamExtractRaw = FunctionsStringSearchToString, NameVisitParamExtractRaw>; + + +void registerFunctionVisitParamExtractRaw(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/visitParamExtractString.cpp b/dbms/src/Functions/visitParamExtractString.cpp new file mode 100644 index 00000000000..a6f4b98145d --- /dev/null +++ b/dbms/src/Functions/visitParamExtractString.cpp @@ -0,0 +1,29 @@ +#include +#include +#include + + +namespace DB +{ + +struct ExtractString +{ + static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data) + { + size_t old_size = res_data.size(); + ReadBufferFromMemory in(pos, end - pos); + if (!tryReadJSONStringInto(res_data, in)) + res_data.resize(old_size); + } +}; + +struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; }; +using FunctionVisitParamExtractString = FunctionsStringSearchToString, NameVisitParamExtractString>; + + +void registerFunctionVisitParamExtractString(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/visitParamExtractUInt.cpp b/dbms/src/Functions/visitParamExtractUInt.cpp new file mode 100644 index 00000000000..5e70eed8253 --- /dev/null +++ b/dbms/src/Functions/visitParamExtractUInt.cpp @@ -0,0 +1,18 @@ +#include +#include +#include + + +namespace DB +{ + +struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; }; +using FunctionVisitParamExtractUInt = FunctionsStringSearch>, NameVisitParamExtractUInt>; + + +void registerFunctionVisitParamExtractUInt(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/visitParamHas.cpp b/dbms/src/Functions/visitParamHas.cpp new file mode 100644 index 00000000000..5fbedfb4995 --- /dev/null +++ b/dbms/src/Functions/visitParamHas.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + + +namespace DB +{ + +struct HasParam +{ + using ResultType = UInt8; + + static UInt8 extract(const UInt8 *, const UInt8 *) + { + return true; + } +}; + +struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; }; +using FunctionVisitParamHas = FunctionsStringSearch, NameVisitParamHas>; + + +void registerFunctionVisitParamHas(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/yandexConsistentHash.cpp b/dbms/src/Functions/yandexConsistentHash.cpp new file mode 100644 index 00000000000..58617e29af7 --- /dev/null +++ b/dbms/src/Functions/yandexConsistentHash.cpp @@ -0,0 +1,34 @@ +#include "FunctionsConsistentHashing.h" +#include + +#include + +namespace DB +{ + +/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov +struct YandexConsistentHashImpl +{ + static constexpr auto name = "yandexConsistentHash"; + + using HashType = UInt64; + /// Actually it supports UInt64, but it is efficient only if n <= 32768 + using ResultType = UInt16; + using BucketsType = ResultType; + static constexpr auto max_buckets = 32768; + + static inline ResultType apply(UInt64 hash, BucketsType n) + { + return ConsistentHashing(hash, n); + } +}; + +using FunctionYandexConsistentHash = FunctionConsistentHashImpl; + +void registerFunctionYandexConsistentHash(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} +