2020-04-02 13:48:14 +00:00
|
|
|
#pragma once
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
#include <Functions/TargetSpecific.h>
|
2020-04-05 12:01:33 +00:00
|
|
|
#include <Functions/IFunctionImpl.h>
|
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
#include <Common/Stopwatch.h>
|
2020-05-17 15:13:01 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2020-04-05 19:39:12 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
#include <mutex>
|
2020-04-05 12:01:33 +00:00
|
|
|
#include <random>
|
|
|
|
|
2020-05-26 15:56:46 +00:00
|
|
|
/* This file contains helper class ImplementationSelector. It makes easier to combine
|
|
|
|
* several implementations of IFunction/IExecutableFunctionImpl.
|
|
|
|
*/
|
2020-04-02 13:48:14 +00:00
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2020-04-02 13:48:14 +00:00
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NO_SUITABLE_FUNCTION_IMPLEMENTATION;
|
|
|
|
}
|
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
namespace detail
|
2020-04-02 13:48:14 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
class PerformanceStatistics
|
2020-04-02 13:48:14 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
public:
|
|
|
|
size_t select(bool considarable)
|
2020-04-05 12:01:33 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
/// We don't need to choose/measure anything if there's only one variant.
|
|
|
|
if (size() == 1)
|
|
|
|
return 0;
|
|
|
|
|
2020-05-28 11:48:56 +00:00
|
|
|
std::lock_guard guard(lock);
|
2020-05-26 11:15:44 +00:00
|
|
|
|
|
|
|
size_t best = 0;
|
|
|
|
double best_sample = data[0].sample(rng);
|
|
|
|
|
|
|
|
for (size_t i = 1; i < data.size(); ++i)
|
|
|
|
{
|
|
|
|
double sample = data[i].sample(rng);
|
|
|
|
if (sample < best_sample)
|
|
|
|
{
|
|
|
|
best_sample = sample;
|
|
|
|
best = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (considarable)
|
|
|
|
data[best].run();
|
|
|
|
|
|
|
|
return best;
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
void complete(size_t id, double seconds, double bytes)
|
2020-04-05 12:01:33 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
if (size() == 1)
|
|
|
|
return;
|
2020-05-28 11:48:56 +00:00
|
|
|
|
|
|
|
std::lock_guard guard(lock);
|
2020-05-26 11:15:44 +00:00
|
|
|
data[id].complete(seconds, bytes);
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
size_t size() const
|
2020-04-05 12:01:33 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
return data.size();
|
2020-04-02 13:48:14 +00:00
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
bool empty() const
|
2020-04-05 12:01:33 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
return size() == 0;
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
void emplace_back()
|
|
|
|
{
|
|
|
|
data.emplace_back();
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
private:
|
|
|
|
struct Element
|
2020-04-05 12:01:33 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
int completed_count = 0;
|
|
|
|
int running_count = 0;
|
|
|
|
double sum = 0;
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
int adjustedCount() const
|
|
|
|
{
|
|
|
|
return completed_count - NUM_INVOCATIONS_TO_THROW_OFF;
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
double mean() const
|
|
|
|
{
|
|
|
|
return sum / adjustedCount();
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
/// For better convergence, we don't use proper estimate of stddev.
|
|
|
|
/// We want to eventually separate between two algorithms even in case
|
|
|
|
/// when there is no statistical significant difference between them.
|
|
|
|
double sigma() const
|
|
|
|
{
|
|
|
|
return mean() / sqrt(adjustedCount());
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
void run()
|
|
|
|
{
|
|
|
|
++running_count;
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
void complete(double seconds, double bytes)
|
|
|
|
{
|
|
|
|
--running_count;
|
|
|
|
++completed_count;
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
if (adjustedCount() > 0)
|
|
|
|
sum += seconds / bytes;
|
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
double sample(pcg64 & stat_rng) const
|
|
|
|
{
|
|
|
|
/// If there is a variant with not enough statistics, always choose it.
|
|
|
|
/// And in that case prefer variant with less number of invocations.
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 15:56:46 +00:00
|
|
|
if (adjustedCount() < 2)
|
|
|
|
{
|
|
|
|
// TODO(dakovalkov): rewrite it.
|
|
|
|
int all_count = adjustedCount() + running_count;
|
|
|
|
if (all_count < 3)
|
|
|
|
return all_count - 2;
|
|
|
|
else
|
|
|
|
return adjustedCount() + running_count * 100;
|
|
|
|
}
|
2020-05-26 11:15:44 +00:00
|
|
|
return std::normal_distribution<>(mean(), sigma())(stat_rng);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<Element> data;
|
|
|
|
std::mutex lock;
|
|
|
|
/// It's Ok that generator is not seeded.
|
|
|
|
pcg64 rng;
|
|
|
|
/// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account.
|
|
|
|
static constexpr int NUM_INVOCATIONS_TO_THROW_OFF = 2;
|
|
|
|
};
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
template <typename T, class = decltype(T::getImplementationTag())>
|
|
|
|
std::true_type hasImplementationTagTest(const T&);
|
|
|
|
std::false_type hasImplementationTagTest(...);
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
constexpr bool has_implementation_tag = decltype(hasImplementationTagTest(std::declval<T>()))::value;
|
2020-05-16 06:15:39 +00:00
|
|
|
|
2020-05-26 15:56:46 +00:00
|
|
|
/* Implementation tag is used to run specific implementation (for debug/testing purposes).
|
|
|
|
* It can be specified via static method ::getImplementationTag() in Function (optional).
|
|
|
|
*/
|
2020-05-26 11:15:44 +00:00
|
|
|
template <typename T>
|
|
|
|
String getImplementationTag(TargetArch arch)
|
2020-05-16 06:59:08 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
if constexpr (has_implementation_tag<T>)
|
|
|
|
return ToString(arch) + "_" + T::getImplementationTag();
|
|
|
|
else
|
|
|
|
return ToString(arch);
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
2020-05-26 11:15:44 +00:00
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
/* Class which is used to store implementations for the function and to select the best one to run
|
2020-05-18 20:07:24 +00:00
|
|
|
* based on processor architecture and statistics from previous runs.
|
|
|
|
*
|
|
|
|
* FunctionInterface is typically IFunction or IExecutableFunctionImpl, but practically it can be
|
|
|
|
* any interface that contains "execute" method (IFunction is an exception and is supported as well).
|
|
|
|
*
|
|
|
|
* Example of usage:
|
|
|
|
*
|
|
|
|
* class MyDefaulImpl : public IFunction {...};
|
2020-05-26 15:56:46 +00:00
|
|
|
* DECLARE_AVX2_SPECIFIC_CODE(
|
2020-05-18 20:07:24 +00:00
|
|
|
* class MyAVX2Impl : public IFunction {...};
|
2020-05-26 15:56:46 +00:00
|
|
|
* )
|
2020-05-18 20:07:24 +00:00
|
|
|
*
|
|
|
|
* /// All methods but execute/executeImpl are usually not bottleneck, so just use them from
|
|
|
|
* /// default implementation.
|
|
|
|
* class MyFunction : public MyDefaultImpl
|
|
|
|
* {
|
|
|
|
* MyFunction(const Context & context) : selector(context) {
|
|
|
|
* /// Register all implementations in constructor.
|
|
|
|
* /// There could be as many implementation for every target as you want.
|
|
|
|
* selector.registerImplementation<TargetArch::Default, MyDefaultImpl>();
|
2020-05-26 15:56:46 +00:00
|
|
|
* #if USE_MULTITARGET_CODE
|
|
|
|
* selector.registreImplementation<TargetArch::AVX2, TargetSpecific::AVX2::MyAVX2Impl>();
|
|
|
|
* #endif
|
2020-05-18 20:07:24 +00:00
|
|
|
* }
|
|
|
|
*
|
|
|
|
* void executeImpl(...) override {
|
|
|
|
* selector.selectAndExecute(...);
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* static FunctionPtr create(const Context & context) {
|
|
|
|
* return std::make_shared<MyFunction>(context);
|
|
|
|
* }
|
|
|
|
* private:
|
|
|
|
* ImplementationSelector<IFunction> selector;
|
|
|
|
* };
|
|
|
|
*/
|
|
|
|
template <typename FunctionInterface>
|
|
|
|
class ImplementationSelector
|
2020-04-05 19:39:12 +00:00
|
|
|
{
|
|
|
|
public:
|
2020-05-18 20:07:24 +00:00
|
|
|
using ImplementationPtr = std::shared_ptr<FunctionInterface>;
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-18 20:07:24 +00:00
|
|
|
ImplementationSelector(const Context & context_) : context(context_) {}
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-18 20:07:24 +00:00
|
|
|
/* Select the best implementation based on previous runs.
|
|
|
|
* If FunctionInterface is IFunction, then "executeImpl" method of the implementation will be called
|
|
|
|
* and "execute" otherwise.
|
|
|
|
*/
|
|
|
|
void selectAndExecute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
|
2020-04-05 12:01:33 +00:00
|
|
|
{
|
2020-05-18 20:07:24 +00:00
|
|
|
if (implementations.empty())
|
|
|
|
throw Exception("There are no available implementations for function " "TODO(dakovalkov): add name",
|
2020-05-16 06:59:08 +00:00
|
|
|
ErrorCodes::NO_SUITABLE_FUNCTION_IMPLEMENTATION);
|
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
/// Statistics shouldn't rely on small blocks.
|
|
|
|
bool considerable = (input_rows_count > 1000);
|
|
|
|
|
|
|
|
size_t id = statistics.select(considerable);
|
2020-04-05 19:39:12 +00:00
|
|
|
Stopwatch watch;
|
2020-05-16 06:59:08 +00:00
|
|
|
|
2020-05-18 20:07:24 +00:00
|
|
|
if constexpr (std::is_same_v<FunctionInterface, IFunction>)
|
|
|
|
implementations[id]->executeImpl(block, arguments, result, input_rows_count);
|
2020-05-16 06:59:08 +00:00
|
|
|
else
|
2020-05-18 20:07:24 +00:00
|
|
|
implementations[id]->execute(block, arguments, result, input_rows_count);
|
2020-05-20 16:44:01 +00:00
|
|
|
|
2020-04-05 19:39:12 +00:00
|
|
|
watch.stop();
|
2020-05-16 06:59:08 +00:00
|
|
|
|
2020-05-26 11:15:44 +00:00
|
|
|
if (considerable)
|
2020-05-16 06:59:08 +00:00
|
|
|
{
|
2020-05-26 11:15:44 +00:00
|
|
|
// TODO(dakovalkov): Calculate something more informative than rows count.
|
|
|
|
statistics.complete(id, watch.elapsedSeconds(), input_rows_count);
|
2020-04-05 19:39:12 +00:00
|
|
|
}
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 20:07:24 +00:00
|
|
|
/* Register new implementation for function.
|
|
|
|
*
|
|
|
|
* Arch - required instruction set for running the implementation. It's guarantied that no one method would
|
|
|
|
* be called (even the constructor and static methods) if the processor doesn't support this instruction set.
|
|
|
|
*
|
|
|
|
* FunctionImpl - implementation, should be inherited from template argument FunctionInterface.
|
|
|
|
*
|
|
|
|
* All function arguments will be forwarded to the implementation constructor.
|
|
|
|
*/
|
|
|
|
template <TargetArch Arch, typename FunctionImpl, typename ...Args>
|
|
|
|
void registerImplementation(Args&&... args)
|
|
|
|
{
|
|
|
|
if (IsArchSupported(Arch))
|
|
|
|
{
|
2020-05-19 10:54:41 +00:00
|
|
|
// TODO(dakovalkov): make this option better.
|
|
|
|
const auto & choose_impl = context.getSettingsRef().function_implementation.value;
|
2020-05-26 11:15:44 +00:00
|
|
|
if (choose_impl.empty() || choose_impl == detail::getImplementationTag<FunctionImpl>(Arch))
|
2020-05-19 10:54:41 +00:00
|
|
|
{
|
|
|
|
implementations.emplace_back(std::make_shared<FunctionImpl>(std::forward<Args>(args)...));
|
|
|
|
statistics.emplace_back();
|
|
|
|
}
|
2020-05-18 20:07:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-02 13:48:14 +00:00
|
|
|
private:
|
2020-05-17 15:13:01 +00:00
|
|
|
const Context & context;
|
2020-05-18 20:07:24 +00:00
|
|
|
std::vector<ImplementationPtr> implementations;
|
2020-05-26 11:15:44 +00:00
|
|
|
detail::PerformanceStatistics statistics;
|
2020-04-13 09:25:53 +00:00
|
|
|
};
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2020-05-16 06:59:08 +00:00
|
|
|
}
|