Merge pull request #6321 from yandex/every_function_in_own_file_10

Every function in its own file, part 10
This commit is contained in:
alexey-milovidov 2019-08-05 01:02:02 +03:00 committed by GitHub
commit 0c150fbf66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 363 additions and 214 deletions

View File

@ -1,15 +0,0 @@
#include "FunctionsConsistentHashing.h"
#include <Functions/FunctionFactory.h>
namespace DB
{
void registerFunctionsConsistentHashing(FunctionFactory & factory)
{
factory.registerFunction<FunctionYandexConsistentHash>();
factory.registerFunction<FunctionJumpConsistentHash>();
factory.registerFunction<FunctionSumburConsistentHash>();
}
}

View File

@ -8,9 +8,6 @@
#include <Common/typeid_cast.h>
#include <common/likely.h>
#include <sumbur.h>
#include <consistent_hashing.h>
namespace DB
{
@ -23,69 +20,6 @@ namespace ErrorCodes
}
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
struct YandexConsistentHashImpl
{
static constexpr auto name = "yandexConsistentHash";
using HashType = UInt64;
/// Actually it supports UInt64, but it is efficient only if n <= 32768
using ResultType = UInt16;
using BucketsType = ResultType;
static constexpr auto max_buckets = 32768;
static inline ResultType apply(UInt64 hash, BucketsType n)
{
return ConsistentHashing(hash, n);
}
};
/// Code from https://arxiv.org/pdf/1406.2294.pdf
static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets)
{
int64_t b = -1, j = 0;
while (j < num_buckets)
{
b = j;
key = key * 2862933555777941757ULL + 1;
j = static_cast<int64_t>((b + 1) * (double(1LL << 31) / double((key >> 33) + 1)));
}
return static_cast<int32_t>(b);
}
struct JumpConsistentHashImpl
{
static constexpr auto name = "jumpConsistentHash";
using HashType = UInt64;
using ResultType = Int32;
using BucketsType = ResultType;
static constexpr auto max_buckets = static_cast<UInt64>(std::numeric_limits<BucketsType>::max());
static inline ResultType apply(UInt64 hash, BucketsType n)
{
return JumpConsistentHash(hash, n);
}
};
struct SumburConsistentHashImpl
{
static constexpr auto name = "sumburConsistentHash";
using HashType = UInt32;
using ResultType = UInt16;
using BucketsType = ResultType;
static constexpr auto max_buckets = static_cast<UInt64>(std::numeric_limits<BucketsType>::max());
static inline ResultType apply(HashType hash, BucketsType n)
{
return static_cast<ResultType>(sumburConsistentHash(hash, n));
}
};
template <typename Impl>
class FunctionConsistentHashImpl : public IFunction
{
@ -221,10 +155,4 @@ private:
}
};
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>;
using FunctionJumpConsistentHash = FunctionConsistentHashImpl<JumpConsistentHashImpl>;
using FunctionSumburConsistentHash = FunctionConsistentHashImpl<SumburConsistentHashImpl>;
}

View File

@ -1,38 +0,0 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; };
struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; };
struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; };
struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; };
struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; };
struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; };
struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; };
using FunctionVisitParamHas = FunctionsStringSearch<ExtractParamImpl<HasParam>, NameVisitParamHas>;
using FunctionVisitParamExtractUInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<UInt64>>, NameVisitParamExtractUInt>;
using FunctionVisitParamExtractInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Int64>>, NameVisitParamExtractInt>;
using FunctionVisitParamExtractFloat = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Float64>>, NameVisitParamExtractFloat>;
using FunctionVisitParamExtractBool = FunctionsStringSearch<ExtractParamImpl<ExtractBool>, NameVisitParamExtractBool>;
using FunctionVisitParamExtractRaw = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractRaw>, NameVisitParamExtractRaw>;
using FunctionVisitParamExtractString = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractString>, NameVisitParamExtractString>;
void registerFunctionsVisitParam(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamHas>();
factory.registerFunction<FunctionVisitParamExtractUInt>();
factory.registerFunction<FunctionVisitParamExtractInt>();
factory.registerFunction<FunctionVisitParamExtractFloat>();
factory.registerFunction<FunctionVisitParamExtractBool>();
factory.registerFunction<FunctionVisitParamExtractRaw>();
factory.registerFunction<FunctionVisitParamExtractString>();
}
}

View File

@ -1,14 +1,9 @@
#pragma once
#include <Poco/UTF8Encoding.h>
#include <Poco/Unicode.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Common/hex.h>
#include <Common/Volnitsky.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
@ -43,15 +38,6 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
struct HasParam
{
using ResultType = UInt8;
static UInt8 extract(const UInt8 *, const UInt8 *)
{
return true;
}
};
template <typename NumericType>
struct ExtractNumericType
@ -78,77 +64,6 @@ struct ExtractNumericType
}
};
struct ExtractBool
{
using ResultType = UInt8;
static UInt8 extract(const UInt8 * begin, const UInt8 * end)
{
return begin + 4 <= end && 0 == strncmp(reinterpret_cast<const char *>(begin), "true", 4);
}
};
struct ExtractRaw
{
using ExpectChars = PODArrayWithStackMemory<char, 64>;
static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
{
ExpectChars expects_end;
UInt8 current_expect_end = 0;
for (auto extract_begin = pos; pos != end; ++pos)
{
if (current_expect_end && *pos == current_expect_end)
{
expects_end.pop_back();
current_expect_end = expects_end.empty() ? 0 : expects_end.back();
}
else
{
switch (*pos)
{
case '[':
current_expect_end = ']';
expects_end.push_back(current_expect_end);
break;
case '{':
current_expect_end = '}';
expects_end.push_back(current_expect_end);
break;
case '"' :
current_expect_end = '"';
expects_end.push_back(current_expect_end);
break;
case '\\':
/// skip backslash
if (pos + 1 < end && pos[1] == '"')
pos++;
break;
default:
if (!current_expect_end && (*pos == ',' || *pos == '}'))
{
res_data.insert(extract_begin, pos);
return;
}
}
}
}
}
};
struct ExtractString
{
static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
{
size_t old_size = res_data.size();
ReadBufferFromMemory in(pos, end - pos);
if (!tryReadJSONStringInto(res_data, in))
res_data.resize(old_size);
}
};
/** Searches for occurrences of a field in the visit parameter and calls ParamExtractor
* for each occurrence of the field, passing it a pointer to the part of the string,
@ -285,6 +200,4 @@ struct ExtractParamToStringImpl
}
};
}

View File

@ -0,0 +1,44 @@
#include "FunctionsConsistentHashing.h"
#include <Functions/FunctionFactory.h>
namespace DB
{
/// Code from https://arxiv.org/pdf/1406.2294.pdf
static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets)
{
int64_t b = -1, j = 0;
while (j < num_buckets)
{
b = j;
key = key * 2862933555777941757ULL + 1;
j = static_cast<int64_t>((b + 1) * (double(1LL << 31) / double((key >> 33) + 1)));
}
return static_cast<int32_t>(b);
}
struct JumpConsistentHashImpl
{
static constexpr auto name = "jumpConsistentHash";
using HashType = UInt64;
using ResultType = Int32;
using BucketsType = ResultType;
static constexpr auto max_buckets = static_cast<UInt64>(std::numeric_limits<BucketsType>::max());
static inline ResultType apply(UInt64 hash, BucketsType n)
{
return JumpConsistentHash(hash, n);
}
};
using FunctionJumpConsistentHash = FunctionConsistentHashImpl<JumpConsistentHashImpl>;
void registerFunctionJumpConsistentHash(FunctionFactory & factory)
{
factory.registerFunction<FunctionJumpConsistentHash>();
}
}

View File

@ -20,7 +20,6 @@ void registerFunctionsExternalDictionaries(FunctionFactory &);
void registerFunctionsExternalModels(FunctionFactory &);
void registerFunctionsFormatting(FunctionFactory &);
void registerFunctionsHashing(FunctionFactory &);
void registerFunctionsConsistentHashing(FunctionFactory &);
void registerFunctionsHigherOrder(FunctionFactory &);
void registerFunctionsLogical(FunctionFactory &);
void registerFunctionsMiscellaneous(FunctionFactory &);
@ -41,6 +40,7 @@ void registerFunctionsNull(FunctionFactory &);
void registerFunctionsFindCluster(FunctionFactory &);
void registerFunctionsJSON(FunctionFactory &);
void registerFunctionsIntrospection(FunctionFactory &);
void registerFunctionsConsistentHashing(FunctionFactory & factory);
void registerFunctions()
{
@ -60,7 +60,6 @@ void registerFunctions()
registerFunctionsExternalModels(factory);
registerFunctionsFormatting(factory);
registerFunctionsHashing(factory);
registerFunctionsConsistentHashing(factory);
registerFunctionsHigherOrder(factory);
registerFunctionsLogical(factory);
registerFunctionsMiscellaneous(factory);
@ -80,6 +79,7 @@ void registerFunctions()
registerFunctionsFindCluster(factory);
registerFunctionsJSON(factory);
registerFunctionsIntrospection(factory);
registerFunctionsConsistentHashing(factory);
}
}

View File

@ -0,0 +1,18 @@
namespace DB
{
class FunctionFactory;
void registerFunctionYandexConsistentHash(FunctionFactory & factory);
void registerFunctionJumpConsistentHash(FunctionFactory & factory);
void registerFunctionSumburConsistentHash(FunctionFactory & factory);
void registerFunctionsConsistentHashing(FunctionFactory & factory)
{
registerFunctionYandexConsistentHash(factory);
registerFunctionJumpConsistentHash(factory);
registerFunctionSumburConsistentHash(factory);
}
}

View File

@ -0,0 +1,25 @@
namespace DB
{
class FunctionFactory;
void registerFunctionVisitParamHas(FunctionFactory & factory);
void registerFunctionVisitParamExtractUInt(FunctionFactory & factory);
void registerFunctionVisitParamExtractInt(FunctionFactory & factory);
void registerFunctionVisitParamExtractFloat(FunctionFactory & factory);
void registerFunctionVisitParamExtractBool(FunctionFactory & factory);
void registerFunctionVisitParamExtractRaw(FunctionFactory & factory);
void registerFunctionVisitParamExtractString(FunctionFactory & factory);
void registerFunctionsVisitParam(FunctionFactory & factory)
{
registerFunctionVisitParamHas(factory);
registerFunctionVisitParamExtractUInt(factory);
registerFunctionVisitParamExtractInt(factory);
registerFunctionVisitParamExtractFloat(factory);
registerFunctionVisitParamExtractBool(factory);
registerFunctionVisitParamExtractRaw(factory);
registerFunctionVisitParamExtractString(factory);
}
}

View File

@ -0,0 +1,34 @@
#include "FunctionsConsistentHashing.h"
#include <Functions/FunctionFactory.h>
#include <sumbur.h>
namespace DB
{
struct SumburConsistentHashImpl
{
static constexpr auto name = "sumburConsistentHash";
using HashType = UInt32;
using ResultType = UInt16;
using BucketsType = ResultType;
static constexpr auto max_buckets = static_cast<UInt64>(std::numeric_limits<BucketsType>::max());
static inline ResultType apply(HashType hash, BucketsType n)
{
return static_cast<ResultType>(sumburConsistentHash(hash, n));
}
};
using FunctionSumburConsistentHash = FunctionConsistentHashImpl<SumburConsistentHashImpl>;
void registerFunctionSumburConsistentHash(FunctionFactory & factory)
{
factory.registerFunction<FunctionSumburConsistentHash>();
}
}

View File

@ -0,0 +1,28 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct ExtractBool
{
using ResultType = UInt8;
static UInt8 extract(const UInt8 * begin, const UInt8 * end)
{
return begin + 4 <= end && 0 == strncmp(reinterpret_cast<const char *>(begin), "true", 4);
}
};
struct NameVisitParamExtractBool { static constexpr auto name = "visitParamExtractBool"; };
using FunctionVisitParamExtractBool = FunctionsStringSearch<ExtractParamImpl<ExtractBool>, NameVisitParamExtractBool>;
void registerFunctionVisitParamExtractBool(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamExtractBool>();
}
}

View File

@ -0,0 +1,18 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct NameVisitParamExtractFloat { static constexpr auto name = "visitParamExtractFloat"; };
using FunctionVisitParamExtractFloat = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Float64>>, NameVisitParamExtractFloat>;
void registerFunctionVisitParamExtractFloat(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamExtractFloat>();
}
}

View File

@ -0,0 +1,18 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct NameVisitParamExtractInt { static constexpr auto name = "visitParamExtractInt"; };
using FunctionVisitParamExtractInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<Int64>>, NameVisitParamExtractInt>;
void registerFunctionVisitParamExtractInt(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamExtractInt>();
}
}

View File

@ -0,0 +1,67 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct ExtractRaw
{
using ExpectChars = PODArrayWithStackMemory<char, 64>;
static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
{
ExpectChars expects_end;
UInt8 current_expect_end = 0;
for (auto extract_begin = pos; pos != end; ++pos)
{
if (current_expect_end && *pos == current_expect_end)
{
expects_end.pop_back();
current_expect_end = expects_end.empty() ? 0 : expects_end.back();
}
else
{
switch (*pos)
{
case '[':
current_expect_end = ']';
expects_end.push_back(current_expect_end);
break;
case '{':
current_expect_end = '}';
expects_end.push_back(current_expect_end);
break;
case '"' :
current_expect_end = '"';
expects_end.push_back(current_expect_end);
break;
case '\\':
/// skip backslash
if (pos + 1 < end && pos[1] == '"')
pos++;
break;
default:
if (!current_expect_end && (*pos == ',' || *pos == '}'))
{
res_data.insert(extract_begin, pos);
return;
}
}
}
}
}
};
struct NameVisitParamExtractRaw { static constexpr auto name = "visitParamExtractRaw"; };
using FunctionVisitParamExtractRaw = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractRaw>, NameVisitParamExtractRaw>;
void registerFunctionVisitParamExtractRaw(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamExtractRaw>();
}
}

View File

@ -0,0 +1,29 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct ExtractString
{
static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data)
{
size_t old_size = res_data.size();
ReadBufferFromMemory in(pos, end - pos);
if (!tryReadJSONStringInto(res_data, in))
res_data.resize(old_size);
}
};
struct NameVisitParamExtractString { static constexpr auto name = "visitParamExtractString"; };
using FunctionVisitParamExtractString = FunctionsStringSearchToString<ExtractParamToStringImpl<ExtractString>, NameVisitParamExtractString>;
void registerFunctionVisitParamExtractString(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamExtractString>();
}
}

View File

@ -0,0 +1,18 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct NameVisitParamExtractUInt { static constexpr auto name = "visitParamExtractUInt"; };
using FunctionVisitParamExtractUInt = FunctionsStringSearch<ExtractParamImpl<ExtractNumericType<UInt64>>, NameVisitParamExtractUInt>;
void registerFunctionVisitParamExtractUInt(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamExtractUInt>();
}
}

View File

@ -0,0 +1,28 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsVisitParam.h>
#include <Functions/FunctionsStringSearch.h>
namespace DB
{
struct HasParam
{
using ResultType = UInt8;
static UInt8 extract(const UInt8 *, const UInt8 *)
{
return true;
}
};
struct NameVisitParamHas { static constexpr auto name = "visitParamHas"; };
using FunctionVisitParamHas = FunctionsStringSearch<ExtractParamImpl<HasParam>, NameVisitParamHas>;
void registerFunctionVisitParamHas(FunctionFactory & factory)
{
factory.registerFunction<FunctionVisitParamHas>();
}
}

View File

@ -0,0 +1,34 @@
#include "FunctionsConsistentHashing.h"
#include <Functions/FunctionFactory.h>
#include <consistent_hashing.h>
namespace DB
{
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
struct YandexConsistentHashImpl
{
static constexpr auto name = "yandexConsistentHash";
using HashType = UInt64;
/// Actually it supports UInt64, but it is efficient only if n <= 32768
using ResultType = UInt16;
using BucketsType = ResultType;
static constexpr auto max_buckets = 32768;
static inline ResultType apply(UInt64 hash, BucketsType n)
{
return ConsistentHashing(hash, n);
}
};
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>;
void registerFunctionYandexConsistentHash(FunctionFactory & factory)
{
factory.registerFunction<FunctionYandexConsistentHash>();
}
}