2020-04-02 13:48:14 +00:00
|
|
|
#pragma once
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
#include <Functions/TargetSpecific.h>
|
2020-04-05 12:01:33 +00:00
|
|
|
#include <Functions/IFunctionImpl.h>
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
#include <Common/Stopwatch.h>
|
2020-05-17 15:13:01 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2020-04-05 19:39:12 +00:00
|
|
|
|
2020-04-05 12:01:33 +00:00
|
|
|
#include <random>
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
/// This file contains Adaptors which help to combine several implementations of the function.
|
|
|
|
/// Adaptors check that implementation can be executed on the current platform and choose
|
|
|
|
/// that one which works faster according to previous runs.
|
2020-04-02 13:48:14 +00:00
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2020-04-02 13:48:14 +00:00
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NO_SUITABLE_FUNCTION_IMPLEMENTATION;
|
|
|
|
}
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
// TODO(dakovalkov): This is copied and pasted struct from LZ4_decompress_faster.h with little changes.
|
2020-04-05 12:01:33 +00:00
|
|
|
struct PerformanceStatistics
|
2020-04-02 13:48:14 +00:00
|
|
|
{
|
2020-04-05 12:01:33 +00:00
|
|
|
struct Element
|
2020-04-02 13:48:14 +00:00
|
|
|
{
|
2020-04-05 12:01:33 +00:00
|
|
|
double count = 0;
|
|
|
|
double sum = 0;
|
|
|
|
|
|
|
|
double adjustedCount() const
|
|
|
|
{
|
|
|
|
return count - NUM_INVOCATIONS_TO_THROW_OFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
double mean() const
|
|
|
|
{
|
|
|
|
return sum / adjustedCount();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// For better convergence, we don't use proper estimate of stddev.
|
|
|
|
/// We want to eventually separate between two algorithms even in case
|
|
|
|
/// when there is no statistical significant difference between them.
|
|
|
|
double sigma() const
|
|
|
|
{
|
|
|
|
return mean() / sqrt(adjustedCount());
|
2020-04-02 13:48:14 +00:00
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
|
|
|
void update(double seconds, double bytes)
|
|
|
|
{
|
|
|
|
++count;
|
|
|
|
|
|
|
|
if (count > NUM_INVOCATIONS_TO_THROW_OFF)
|
|
|
|
sum += seconds / bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
double sample(pcg64 & stat_rng) const
|
|
|
|
{
|
|
|
|
/// If there is a variant with not enough statistics, always choose it.
|
|
|
|
/// And in that case prefer variant with less number of invocations.
|
|
|
|
|
|
|
|
if (adjustedCount() < 2)
|
|
|
|
return adjustedCount() - 1;
|
|
|
|
else
|
|
|
|
return std::normal_distribution<>(mean(), sigma())(stat_rng);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account.
|
|
|
|
static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2;
|
|
|
|
|
|
|
|
/// How to select method to run.
|
|
|
|
/// -1 - automatically, based on statistics (default);
|
|
|
|
/// -2 - choose methods in round robin fashion (for performance testing).
|
|
|
|
/// >= 0 - always choose specified method (for performance testing);
|
|
|
|
ssize_t choose_method = -1;
|
|
|
|
|
|
|
|
std::vector<Element> data;
|
|
|
|
|
|
|
|
/// It's Ok that generator is not seeded.
|
|
|
|
pcg64 rng;
|
|
|
|
|
|
|
|
/// To select from different algorithms we use a kind of "bandits" algorithm.
|
|
|
|
/// Sample random values from estimated normal distributions and choose the minimal.
|
|
|
|
size_t select()
|
|
|
|
{
|
|
|
|
if (choose_method < 0)
|
|
|
|
{
|
|
|
|
std::vector<double> samples(data.size());
|
|
|
|
for (size_t i = 0; i < data.size(); ++i)
|
|
|
|
samples[i] = choose_method == -1
|
|
|
|
? data[i].sample(rng)
|
|
|
|
: data[i].adjustedCount();
|
|
|
|
|
|
|
|
return std::min_element(samples.begin(), samples.end()) - samples.begin();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return choose_method;
|
|
|
|
}
|
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
size_t size() const
|
|
|
|
{
|
2020-04-05 12:01:33 +00:00
|
|
|
return data.size();
|
|
|
|
}
|
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
bool empty() const
|
|
|
|
{
|
2020-05-16 06:15:39 +00:00
|
|
|
return size() == 0;
|
|
|
|
}
|
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
void emplace_back()
|
|
|
|
{
|
2020-04-05 12:01:33 +00:00
|
|
|
data.emplace_back();
|
|
|
|
}
|
|
|
|
|
|
|
|
PerformanceStatistics() {}
|
|
|
|
PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
|
|
|
|
};
|
|
|
|
|
2020-05-15 10:10:34 +00:00
|
|
|
struct PerformanceAdaptorOptions
|
|
|
|
{
|
2020-05-16 06:15:39 +00:00
|
|
|
std::optional<std::vector<String>> implementations;
|
|
|
|
};
|
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
/// Redirects IExecutableFunctionImpl::execute() and IFunction:executeImpl() to executeFunctionImpl();
|
2020-05-16 06:15:39 +00:00
|
|
|
template <typename DefaultFunction, typename Dummy = void>
|
|
|
|
class FunctionExecutor;
|
|
|
|
|
|
|
|
template <typename DefaultFunction>
|
|
|
|
class FunctionExecutor<DefaultFunction, std::enable_if_t<std::is_base_of_v<IExecutableFunctionImpl, DefaultFunction>>>
|
|
|
|
: public DefaultFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using BaseFunctionPtr = ExecutableFunctionImplPtr;
|
|
|
|
|
|
|
|
template <typename ...Args>
|
2020-05-16 17:21:23 +00:00
|
|
|
FunctionExecutor(Args&&... args) : DefaultFunction(std::forward<Args>(args)...) {}
|
2020-05-15 10:10:34 +00:00
|
|
|
|
2020-05-16 06:15:39 +00:00
|
|
|
virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0;
|
|
|
|
|
|
|
|
virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
|
|
|
{
|
|
|
|
executeFunctionImpl(block, arguments, result, input_rows_count);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename DefaultFunction>
|
|
|
|
class FunctionExecutor<DefaultFunction, std::enable_if_t<std::is_base_of_v<IFunction, DefaultFunction>>>
|
|
|
|
: public DefaultFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using BaseFunctionPtr = FunctionPtr;
|
|
|
|
|
|
|
|
template <typename ...Args>
|
2020-05-16 17:21:23 +00:00
|
|
|
FunctionExecutor(Args&&... args) : DefaultFunction(std::forward<Args>(args)...) {}
|
2020-05-16 06:15:39 +00:00
|
|
|
|
|
|
|
virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0;
|
|
|
|
|
|
|
|
virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
|
|
|
{
|
|
|
|
executeFunctionImpl(block, arguments, result, input_rows_count);
|
|
|
|
}
|
2020-05-15 10:10:34 +00:00
|
|
|
};
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
/// Combine several IExecutableFunctionImpl into one.
|
|
|
|
/// All the implementations should be equivalent.
|
|
|
|
/// Implementation to execute will be selected based on performance on previous runs.
|
|
|
|
/// DefaultFunction should be executable on every supported platform, while alternative implementations
|
|
|
|
/// could use extended set of instructions (AVX, NEON, etc).
|
|
|
|
/// It's convenient to inherit your func from this and register all alternative implementations in the constructor.
|
|
|
|
template <typename DefaultFunction>
|
2020-05-16 06:15:39 +00:00
|
|
|
class FunctionPerformanceAdaptor : public FunctionExecutor<DefaultFunction>
|
2020-04-05 19:39:12 +00:00
|
|
|
{
|
|
|
|
public:
|
2020-05-18 08:48:35 +00:00
|
|
|
using BaseFunctionPtr = typename FunctionExecutor<DefaultFunction>::BaseFunctionPtr;
|
2020-04-05 19:39:12 +00:00
|
|
|
|
2020-05-16 06:15:39 +00:00
|
|
|
template <typename ...Params>
|
2020-05-17 15:13:01 +00:00
|
|
|
FunctionPerformanceAdaptor(const Context & context_, Params&&... params)
|
2020-05-16 17:21:23 +00:00
|
|
|
: FunctionExecutor<DefaultFunction>(std::forward<Params>(params)...)
|
2020-05-17 15:13:01 +00:00
|
|
|
, context(context_)
|
2020-04-05 19:39:12 +00:00
|
|
|
{
|
2020-05-16 06:59:08 +00:00
|
|
|
if (isImplementationEnabled(DefaultFunction::getImplementationTag()))
|
2020-05-16 06:15:39 +00:00
|
|
|
statistics.emplace_back();
|
2020-04-05 19:39:12 +00:00
|
|
|
}
|
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
/// Register alternative implementation.
|
2020-04-05 19:39:12 +00:00
|
|
|
template<typename Function, typename ...Params>
|
2020-05-16 17:21:23 +00:00
|
|
|
void registerImplementation(TargetArch arch, Params&&... params)
|
|
|
|
{
|
2020-05-16 06:59:08 +00:00
|
|
|
if (IsArchSupported(arch) && isImplementationEnabled(Function::getImplementationTag()))
|
|
|
|
{
|
2020-05-16 17:21:23 +00:00
|
|
|
impls.emplace_back(std::make_shared<Function>(std::forward<Params>(params)...));
|
2020-04-05 19:39:12 +00:00
|
|
|
statistics.emplace_back();
|
|
|
|
}
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-16 17:21:23 +00:00
|
|
|
bool isImplementationEnabled(const String & impl_tag)
|
|
|
|
{
|
2020-05-17 15:13:01 +00:00
|
|
|
const String & tag = context.getSettingsRef().function_implementation.value;
|
|
|
|
return tag.empty() || tag == impl_tag;
|
|
|
|
// if (!options.implementations)
|
|
|
|
// return true;
|
|
|
|
|
|
|
|
// for (const auto & tag : *options.implementations)
|
|
|
|
// {
|
|
|
|
// if (tag == impl_tag)
|
|
|
|
// return true;
|
|
|
|
// }
|
|
|
|
// return false;
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
|
|
|
|
2020-05-16 06:15:39 +00:00
|
|
|
protected:
|
|
|
|
virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments,
|
|
|
|
size_t result, size_t input_rows_count) override
|
2020-04-05 12:01:33 +00:00
|
|
|
{
|
2020-05-16 06:15:39 +00:00
|
|
|
if (statistics.empty())
|
2020-05-16 06:59:08 +00:00
|
|
|
throw Exception("All available implementations are disabled by user config",
|
|
|
|
ErrorCodes::NO_SUITABLE_FUNCTION_IMPLEMENTATION);
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
auto id = statistics.select();
|
|
|
|
Stopwatch watch;
|
2020-05-16 06:59:08 +00:00
|
|
|
|
|
|
|
if (id == impls.size())
|
|
|
|
{
|
2020-05-16 06:15:39 +00:00
|
|
|
if constexpr (std::is_base_of_v<IFunction, FunctionPerformanceAdaptor>)
|
|
|
|
DefaultFunction::executeImpl(block, arguments, result, input_rows_count);
|
|
|
|
else
|
|
|
|
DefaultFunction::execute(block, arguments, result, input_rows_count);
|
2020-05-16 06:59:08 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-05-16 06:15:39 +00:00
|
|
|
if constexpr (std::is_base_of_v<IFunction, FunctionPerformanceAdaptor>)
|
|
|
|
impls[id]->executeImpl(block, arguments, result, input_rows_count);
|
|
|
|
else
|
|
|
|
impls[id]->execute(block, arguments, result, input_rows_count);
|
2020-04-05 13:14:59 +00:00
|
|
|
}
|
2020-04-05 19:39:12 +00:00
|
|
|
watch.stop();
|
2020-05-16 06:59:08 +00:00
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
// TODO(dakovalkov): Calculate something more informative.
|
|
|
|
size_t rows_summary = 0;
|
2020-05-16 06:59:08 +00:00
|
|
|
for (auto i : arguments)
|
|
|
|
{
|
2020-04-05 19:39:12 +00:00
|
|
|
rows_summary += block.getByPosition(i).column->size();
|
|
|
|
}
|
2020-05-16 06:59:08 +00:00
|
|
|
|
|
|
|
if (rows_summary >= 1000)
|
|
|
|
{
|
2020-04-05 19:39:12 +00:00
|
|
|
statistics.data[id].update(watch.elapsedSeconds(), rows_summary);
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
|
|
|
|
2020-04-02 13:48:14 +00:00
|
|
|
private:
|
2020-05-16 06:15:39 +00:00
|
|
|
std::vector<BaseFunctionPtr> impls; // Alternative implementations.
|
2020-04-05 12:01:33 +00:00
|
|
|
PerformanceStatistics statistics;
|
2020-05-17 15:13:01 +00:00
|
|
|
const Context & context;
|
2020-04-13 09:25:53 +00:00
|
|
|
};
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
}
|