mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Bitonic sort on GPU (OpenCL) (#10232)
This commit is contained in:
parent
21ce8dbbc3
commit
fc7afaa639
@ -346,6 +346,7 @@ include (cmake/find/libgsasl.cmake)
|
|||||||
include (cmake/find/rdkafka.cmake)
|
include (cmake/find/rdkafka.cmake)
|
||||||
include (cmake/find/capnp.cmake)
|
include (cmake/find/capnp.cmake)
|
||||||
include (cmake/find/llvm.cmake)
|
include (cmake/find/llvm.cmake)
|
||||||
|
include (cmake/find/opencl.cmake)
|
||||||
include (cmake/find/h3.cmake)
|
include (cmake/find/h3.cmake)
|
||||||
include (cmake/find/libxml2.cmake)
|
include (cmake/find/libxml2.cmake)
|
||||||
include (cmake/find/brotli.cmake)
|
include (cmake/find/brotli.cmake)
|
||||||
@ -382,6 +383,15 @@ if (OS_LINUX AND NOT ENABLE_JEMALLOC)
|
|||||||
message (WARNING "Non default allocator is disabled. This is not recommended for production Linux builds.")
|
message (WARNING "Non default allocator is disabled. This is not recommended for production Linux builds.")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (USE_OPENCL)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBITONIC_SORT_PREFERRED")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBITONIC_SORT_PREFERRED")
|
||||||
|
|
||||||
|
if (OS_DARWIN)
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -framework OpenCL")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
include (cmake/print_flags.cmake)
|
include (cmake/print_flags.cmake)
|
||||||
|
|
||||||
if (TARGET global-group)
|
if (TARGET global-group)
|
||||||
|
17
cmake/find/opencl.cmake
Normal file
17
cmake/find/opencl.cmake
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
if(ENABLE_OPENCL)
|
||||||
|
|
||||||
|
# Intel OpenCl driver: sudo apt install intel-opencl-icd
|
||||||
|
# TODO It's possible to add it as submodules: https://github.com/intel/compute-runtime/releases
|
||||||
|
|
||||||
|
# OpenCL applications should link wiht ICD loader
|
||||||
|
# sudo apt install opencl-headers ocl-icd-libopencl1
|
||||||
|
# sudo ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so
|
||||||
|
|
||||||
|
find_package(OpenCL REQUIRED)
|
||||||
|
if(OpenCL_FOUND)
|
||||||
|
set(USE_OPENCL 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "Using opencl=${USE_OPENCL}: ${OpenCL_INCLUDE_DIRS} : ${OpenCL_LIBRARIES}")
|
@ -61,6 +61,10 @@
|
|||||||
#include <Common/ThreadFuzzer.h>
|
#include <Common/ThreadFuzzer.h>
|
||||||
#include "MySQLHandlerFactory.h"
|
#include "MySQLHandlerFactory.h"
|
||||||
|
|
||||||
|
#ifdef BITONIC_SORT_PREFERRED
|
||||||
|
#include "Common/BitonicSort.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !defined(ARCADIA_BUILD)
|
#if !defined(ARCADIA_BUILD)
|
||||||
# include "config_core.h"
|
# include "config_core.h"
|
||||||
# include "Common/config_version.h"
|
# include "Common/config_version.h"
|
||||||
@ -221,6 +225,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
|||||||
registerDictionaries();
|
registerDictionaries();
|
||||||
registerDisks();
|
registerDisks();
|
||||||
|
|
||||||
|
#if defined (BITONIC_SORT_PREFERRED)
|
||||||
|
BitonicSort::getInstance().configure();
|
||||||
|
#endif
|
||||||
|
|
||||||
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get());
|
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get());
|
||||||
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
|
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
|
||||||
|
|
||||||
|
@ -346,6 +346,11 @@ if (USE_BROTLI)
|
|||||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
|
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (USE_OPENCL)
|
||||||
|
target_link_libraries (clickhouse_common_io PRIVATE ${OpenCL_LIBRARIES})
|
||||||
|
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS})
|
||||||
|
endif ()
|
||||||
|
|
||||||
dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR})
|
dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR})
|
||||||
target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR})
|
target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR})
|
||||||
|
|
||||||
|
@ -18,6 +18,15 @@
|
|||||||
#include <ext/bit_cast.h>
|
#include <ext/bit_cast.h>
|
||||||
#include <pdqsort.h>
|
#include <pdqsort.h>
|
||||||
|
|
||||||
|
#if !defined(ARCADIA_BUILD)
|
||||||
|
# include <Common/config.h>
|
||||||
|
# if USE_OPENCL
|
||||||
|
# include "Common/BitonicSort.h"
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
#undef USE_OPENCL
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
@ -135,6 +144,12 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
#if USE_OPENCL
|
||||||
|
/// If bitonic sort if specified as preferred than `nan_direction_hint` equals specific value 42.
|
||||||
|
if (nan_direction_hint == 42 && BitonicSort::getInstance().sort(data, res, !reverse))
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
/// A case for radix sort
|
/// A case for radix sort
|
||||||
if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
|
if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
|
||||||
{
|
{
|
||||||
|
270
src/Common/BitonicSort.h
Normal file
270
src/Common/BitonicSort.h
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <OpenCL/opencl.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <map>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include <ext/bit_cast.h>
|
||||||
|
#include <Core/Types.h>
|
||||||
|
#include <Core/Defines.h>
|
||||||
|
#include <Common/PODArray.h>
|
||||||
|
#include <Columns/ColumnsCommon.h>
|
||||||
|
|
||||||
|
#include "oclBasics.cpp"
|
||||||
|
#include "bitonicSortKernels.cl"
|
||||||
|
|
||||||
|
class BitonicSort
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
static BitonicSort & getInstance()
|
||||||
|
{
|
||||||
|
static BitonicSort instance = BitonicSort();
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sorts given array in specified order. Returns `true` if given sequence was sorted, `false` otherwise.
|
||||||
|
template <typename T>
|
||||||
|
bool sort(const DB::PaddedPODArray<T> & data, DB::IColumn::Permutation & res, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
size_t s = data.size();
|
||||||
|
|
||||||
|
/// Getting the nearest power of 2.
|
||||||
|
size_t power = 1;
|
||||||
|
|
||||||
|
if (s <= 8) power = 8;
|
||||||
|
else while (power < s) power <<= 1;
|
||||||
|
|
||||||
|
/// Allocates more space for additional stubs to be added if needed.
|
||||||
|
std::vector<T> pairs_content(power);
|
||||||
|
std::vector<UInt32> pairs_indices(power);
|
||||||
|
for (UInt32 i = 0; i < s; ++i)
|
||||||
|
{
|
||||||
|
pairs_content[i] = data[i];
|
||||||
|
pairs_indices[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool result = sort(pairs_content.data(), pairs_indices.data(), s, power - s, sort_ascending);
|
||||||
|
|
||||||
|
if (!result) return false;
|
||||||
|
|
||||||
|
for (size_t i = 0, shift = 0; i < power; ++i)
|
||||||
|
{
|
||||||
|
if (pairs_indices[i] >= s)
|
||||||
|
{
|
||||||
|
++shift;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res[i - shift] = pairs_indices[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creating a configuration instance with making all OpenCl required variables
|
||||||
|
/// such as device, platform, context, queue, program and kernel.
|
||||||
|
void configure()
|
||||||
|
{
|
||||||
|
OCL::Settings settings = OCL::Settings(1, nullptr, 1, nullptr, 1, 0);
|
||||||
|
|
||||||
|
cl_platform_id platform = OCL::getPlatformID(settings);
|
||||||
|
cl_device_id device = OCL::getDeviceID(platform, settings);
|
||||||
|
cl_context gpu_context = OCL::makeContext(device, settings);
|
||||||
|
cl_command_queue command_queue = OCL::makeCommandQueue(device, gpu_context, settings);
|
||||||
|
|
||||||
|
cl_program program = OCL::makeProgram(bitonic_sort_kernels, gpu_context, device, settings);
|
||||||
|
|
||||||
|
/// Creating kernels for each specified data type.
|
||||||
|
cl_int error = 0;
|
||||||
|
|
||||||
|
kernels["char"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_char", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
kernels["uchar"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uchar", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
kernels["short"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_short", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
kernels["ushort"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ushort", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
kernels["int"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_int", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
kernels["uint"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uint", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
kernels["long"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_long", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
kernels["ulong"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ulong", &error),
|
||||||
|
clReleaseKernel);
|
||||||
|
OCL::checkError(error);
|
||||||
|
|
||||||
|
configuration = std::shared_ptr<OCL::Configuration>(new OCL::Configuration(device, gpu_context, command_queue, program));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Dictionary with kernels for each type from list: uchar, char, ushort, short, uint, int, ulong and long.
|
||||||
|
std::map<std::string, std::shared_ptr<KernelType>> kernels;
|
||||||
|
/// Current configuration with core OpenCL instances.
|
||||||
|
std::shared_ptr<OCL::Configuration> configuration = nullptr;
|
||||||
|
|
||||||
|
/// Returns `true` if given sequence was sorted, `false` otherwise.
|
||||||
|
template <typename T>
|
||||||
|
bool sort(T * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
if (typeid(T).name() == typeid(cl_char).name())
|
||||||
|
sort_char(reinterpret_cast<cl_char *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else if (typeid(T) == typeid(cl_uchar))
|
||||||
|
sort_uchar(reinterpret_cast<cl_uchar *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else if (typeid(T) == typeid(cl_short))
|
||||||
|
sort_short(reinterpret_cast<cl_short *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else if (typeid(T) == typeid(cl_ushort))
|
||||||
|
sort_ushort(reinterpret_cast<cl_ushort *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else if (typeid(T) == typeid(cl_int))
|
||||||
|
sort_int(reinterpret_cast<cl_int *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else if (typeid(T) == typeid(cl_uint))
|
||||||
|
sort_uint(reinterpret_cast<cl_uint *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else if (typeid(T) == typeid(cl_long))
|
||||||
|
sort_long(reinterpret_cast<cl_long *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else if (typeid(T) == typeid(cl_ulong))
|
||||||
|
sort_ulong(reinterpret_cast<cl_ulong *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Specific functions for each integer type.
|
||||||
|
void sort_char(cl_char * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_char stubs_value = sort_ascending ? CHAR_MAX : CHAR_MIN;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["char"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_uchar(cl_uchar * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_uchar stubs_value = sort_ascending ? UCHAR_MAX : 0;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["uchar"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_short(cl_short * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_short stubs_value = sort_ascending ? SHRT_MAX : SHRT_MIN;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["short"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_ushort(cl_ushort * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_ushort stubs_value = sort_ascending ? USHRT_MAX : 0;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["ushort"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_int(cl_int * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_int stubs_value = sort_ascending ? INT_MAX : INT_MIN;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["int"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_uint(cl_uint * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_uint stubs_value = sort_ascending ? UINT_MAX : 0;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["uint"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_long(cl_long * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_long stubs_value = sort_ascending ? LONG_MAX : LONG_MIN;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["long"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_ulong(cl_ulong * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_ulong stubs_value = sort_ascending ? ULONG_MAX : 0;
|
||||||
|
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
|
||||||
|
sort(kernels["ulong"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sorts p_input inplace with indices. Works only with arrays which size equals to power of two.
|
||||||
|
template <class T>
|
||||||
|
void sort(cl_kernel kernel, T * p_input, cl_uint * indices, cl_int array_size, cl_uint sort_ascending)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
cl_int num_stages = 0;
|
||||||
|
|
||||||
|
for (cl_int temp = array_size; temp > 2; temp >>= 1)
|
||||||
|
num_stages++;
|
||||||
|
|
||||||
|
/// Creating OpenCL buffers using input arrays memory.
|
||||||
|
cl_mem cl_input_buffer = OCL::createBuffer<T>(p_input, array_size, configuration.get()->context());
|
||||||
|
cl_mem cl_indices_buffer = OCL::createBuffer<cl_uint>(indices, array_size, configuration.get()->context());
|
||||||
|
|
||||||
|
configureKernel<cl_mem>(kernel, 0, static_cast<void *>(&cl_input_buffer));
|
||||||
|
configureKernel<cl_mem>(kernel, 1, static_cast<void *>(&cl_indices_buffer));
|
||||||
|
configureKernel<cl_uint>(kernel, 4, static_cast<void *>(&sort_ascending));
|
||||||
|
|
||||||
|
for (cl_int stage = 0; stage < num_stages; stage++)
|
||||||
|
{
|
||||||
|
configureKernel<cl_uint>(kernel, 2, static_cast<void *>(&stage));
|
||||||
|
|
||||||
|
for (cl_int pass_of_stage = stage; pass_of_stage >= 0; pass_of_stage--)
|
||||||
|
{
|
||||||
|
configureKernel<cl_uint>(kernel, 3, static_cast<void *>(&pass_of_stage));
|
||||||
|
|
||||||
|
/// Setting work-item dimensions.
|
||||||
|
size_t gsize = array_size / (2 * 4);
|
||||||
|
size_t global_work_size[1] = {pass_of_stage ? gsize : gsize << 1 }; // number of quad items in input array
|
||||||
|
|
||||||
|
/// Executing kernel.
|
||||||
|
error = clEnqueueNDRangeKernel(configuration.get()->commandQueue(), kernel, 1, nullptr,
|
||||||
|
global_work_size, nullptr, 0, nullptr, nullptr);
|
||||||
|
OCL::checkError(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Syncs all threads.
|
||||||
|
OCL::finishCommandQueue(configuration.get()->commandQueue());
|
||||||
|
|
||||||
|
OCL::releaseData(p_input, array_size, cl_input_buffer, configuration.get()->commandQueue());
|
||||||
|
OCL::releaseData(indices, array_size, cl_indices_buffer, configuration.get()->commandQueue());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void configureKernel(cl_kernel kernel, int number_of_argument, void * source)
|
||||||
|
{
|
||||||
|
cl_int error = clSetKernelArg(kernel, number_of_argument, sizeof(T), source);
|
||||||
|
OCL::checkError(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fills given sequences from `arraySize` index with `numberOfStubs` values.
|
||||||
|
template <class T>
|
||||||
|
void fillWithStubs(cl_int number_of_stubs, T value, T * p_input,
|
||||||
|
cl_uint * indices, cl_int array_size)
|
||||||
|
{
|
||||||
|
for (cl_int index = 0; index < number_of_stubs; ++index)
|
||||||
|
{
|
||||||
|
p_input[array_size + index] = value;
|
||||||
|
indices[array_size + index] = array_size + index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BitonicSort() {}
|
||||||
|
BitonicSort(BitonicSort const &);
|
||||||
|
void operator=(BitonicSort const &);
|
||||||
|
};
|
@ -493,6 +493,7 @@ namespace ErrorCodes
|
|||||||
extern const int NO_REMOTE_SHARD_AVAILABLE = 519;
|
extern const int NO_REMOTE_SHARD_AVAILABLE = 519;
|
||||||
extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE = 520;
|
extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE = 520;
|
||||||
extern const int ATOMIC_RENAME_FAIL = 521;
|
extern const int ATOMIC_RENAME_FAIL = 521;
|
||||||
|
extern const int OPENCL_ERROR = 522;
|
||||||
|
|
||||||
extern const int KEEPER_EXCEPTION = 999;
|
extern const int KEEPER_EXCEPTION = 999;
|
||||||
extern const int POCO_EXCEPTION = 1000;
|
extern const int POCO_EXCEPTION = 1000;
|
||||||
|
1250
src/Common/bitonicSortKernels.cl
Normal file
1250
src/Common/bitonicSortKernels.cl
Normal file
File diff suppressed because it is too large
Load Diff
@ -8,4 +8,5 @@
|
|||||||
#cmakedefine01 USE_AWS_S3
|
#cmakedefine01 USE_AWS_S3
|
||||||
#cmakedefine01 USE_BROTLI
|
#cmakedefine01 USE_BROTLI
|
||||||
#cmakedefine01 USE_UNWIND
|
#cmakedefine01 USE_UNWIND
|
||||||
|
#cmakedefine01 USE_OPENCL
|
||||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||||
|
363
src/Common/oclBasics.cpp
Normal file
363
src/Common/oclBasics.cpp
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
#include <Common/config.h>
|
||||||
|
#if USE_OPENCL
|
||||||
|
|
||||||
|
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <OpenCL/opencl.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <Core/Types.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
|
#ifndef CL_VERSION_2_0
|
||||||
|
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
using KernelType = std::remove_reference<decltype(*cl_kernel())>::type;
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int OPENCL_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct OCL
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure which represents the most essential settings of common OpenCl entities.
|
||||||
|
*/
|
||||||
|
struct Settings
|
||||||
|
{
|
||||||
|
// Platform info
|
||||||
|
cl_uint number_of_platform_entries;
|
||||||
|
cl_uint * number_of_available_platforms;
|
||||||
|
|
||||||
|
// Devices info
|
||||||
|
cl_uint number_of_devices_entries;
|
||||||
|
cl_uint * number_of_available_devices;
|
||||||
|
|
||||||
|
// Context settings
|
||||||
|
cl_context_properties * context_properties;
|
||||||
|
|
||||||
|
void (* context_callback)(const char *, const void *, size_t, void *);
|
||||||
|
|
||||||
|
void * context_callback_data;
|
||||||
|
|
||||||
|
// Command queue settings
|
||||||
|
cl_command_queue_properties command_queue_properties;
|
||||||
|
|
||||||
|
// Build settings
|
||||||
|
cl_uint number_of_program_source_pointers;
|
||||||
|
|
||||||
|
void (* build_notification_routine)(cl_program, void *user_data);
|
||||||
|
|
||||||
|
void * build_callback_data;
|
||||||
|
char * build_options;
|
||||||
|
|
||||||
|
Settings(cl_uint number_of_platform_entries_,
|
||||||
|
cl_uint * number_of_available_platforms_,
|
||||||
|
cl_uint number_of_devices_entries_,
|
||||||
|
cl_uint * number_of_available_devices_,
|
||||||
|
cl_uint number_of_program_source_pointers_,
|
||||||
|
cl_command_queue_properties command_queue_properties_,
|
||||||
|
cl_context_properties * context_properties_ = nullptr,
|
||||||
|
void * context_data_callback_ = nullptr,
|
||||||
|
void (* context_callback_)(const char *, const void *, size_t, void *) = nullptr,
|
||||||
|
void (* build_notification_routine_)(cl_program, void * user_data) = nullptr,
|
||||||
|
void * build_callback_data_ = nullptr,
|
||||||
|
char * build_options_ = nullptr)
|
||||||
|
{
|
||||||
|
this->number_of_platform_entries = number_of_platform_entries_;
|
||||||
|
this->number_of_available_platforms = number_of_available_platforms_;
|
||||||
|
this->number_of_devices_entries = number_of_devices_entries_;
|
||||||
|
this->number_of_available_devices = number_of_available_devices_;
|
||||||
|
this->number_of_program_source_pointers = number_of_program_source_pointers_;
|
||||||
|
this->command_queue_properties = command_queue_properties_;
|
||||||
|
this->context_properties = context_properties_;
|
||||||
|
this->context_callback = context_callback_;
|
||||||
|
this->context_callback_data = context_data_callback_;
|
||||||
|
this->build_notification_routine = build_notification_routine_;
|
||||||
|
this->build_callback_data = build_callback_data_;
|
||||||
|
this->build_options = build_options_;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration with already created OpenCl common entities.
|
||||||
|
*/
|
||||||
|
class Configuration
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
Configuration(cl_device_id device, cl_context gpu_context,
|
||||||
|
cl_command_queue command_queue, cl_program program)
|
||||||
|
{
|
||||||
|
this->device_ = device;
|
||||||
|
this->gpu_context_ = std::shared_ptr<ContextType>(gpu_context, clReleaseContext);
|
||||||
|
this->command_queue_ = std::shared_ptr<CommandQueueType>(command_queue, clReleaseCommandQueue);
|
||||||
|
this->program_ = std::shared_ptr<ProgramType>(program, clReleaseProgram);
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_device_id device() { return device_; }
|
||||||
|
|
||||||
|
cl_context context() { return gpu_context_.get(); }
|
||||||
|
|
||||||
|
cl_command_queue commandQueue() { return command_queue_.get(); }
|
||||||
|
|
||||||
|
cl_program program() { return program_.get(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
using ProgramType = std::remove_reference<decltype(*cl_program())>::type;
|
||||||
|
using CommandQueueType = std::remove_reference<decltype(*cl_command_queue())>::type;
|
||||||
|
using ContextType = std::remove_reference<decltype(*cl_context())>::type;
|
||||||
|
|
||||||
|
cl_device_id device_;
|
||||||
|
|
||||||
|
std::shared_ptr<ContextType> gpu_context_;
|
||||||
|
std::shared_ptr<CommandQueueType> command_queue_;
|
||||||
|
std::shared_ptr<ProgramType> program_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static String opencl_error_to_str(cl_int error)
|
||||||
|
{
|
||||||
|
#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME;
|
||||||
|
|
||||||
|
// Suppose that no combinations are possible.
|
||||||
|
switch (error)
|
||||||
|
{
|
||||||
|
CASE_CL_CONSTANT(CL_SUCCESS)
|
||||||
|
CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
|
||||||
|
CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
|
||||||
|
CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
|
||||||
|
CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
|
||||||
|
CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
|
||||||
|
CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
|
||||||
|
CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
|
||||||
|
CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
|
||||||
|
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
|
||||||
|
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
||||||
|
CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
|
||||||
|
CASE_CL_CONSTANT(CL_MAP_FAILURE)
|
||||||
|
CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
|
||||||
|
CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
|
||||||
|
CASE_CL_CONSTANT(CL_COMPILE_PROGRAM_FAILURE)
|
||||||
|
CASE_CL_CONSTANT(CL_LINKER_NOT_AVAILABLE)
|
||||||
|
CASE_CL_CONSTANT(CL_LINK_PROGRAM_FAILURE)
|
||||||
|
CASE_CL_CONSTANT(CL_DEVICE_PARTITION_FAILED)
|
||||||
|
CASE_CL_CONSTANT(CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_VALUE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_DEVICE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_BINARY)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_KERNEL)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_EVENT)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_OPERATION)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_IMAGE_DESCRIPTOR)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_COMPILER_OPTIONS)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_LINKER_OPTIONS)
|
||||||
|
CASE_CL_CONSTANT(CL_INVALID_DEVICE_PARTITION_COUNT)
|
||||||
|
default:
|
||||||
|
return "UNKNOWN ERROR CODE ";
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef CASE_CL_CONSTANT
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void checkError(cl_int error)
|
||||||
|
{
|
||||||
|
if (error != CL_SUCCESS)
|
||||||
|
throw DB::Exception("OpenCL error " + opencl_error_to_str(error), DB::ErrorCodes::OPENCL_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Getting OpenCl main entities.
|
||||||
|
|
||||||
|
static cl_platform_id getPlatformID(const Settings & settings)
|
||||||
|
{
|
||||||
|
cl_platform_id platform;
|
||||||
|
cl_int error = clGetPlatformIDs(settings.number_of_platform_entries, &platform,
|
||||||
|
settings.number_of_available_platforms);
|
||||||
|
checkError(error);
|
||||||
|
|
||||||
|
return platform;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cl_device_id getDeviceID(cl_platform_id & platform, const Settings & settings)
|
||||||
|
{
|
||||||
|
cl_device_id device;
|
||||||
|
cl_int error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, settings.number_of_devices_entries,
|
||||||
|
&device, settings.number_of_available_devices);
|
||||||
|
OCL::checkError(error);
|
||||||
|
|
||||||
|
return device;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cl_context makeContext(cl_device_id & device, const Settings & settings)
|
||||||
|
{
|
||||||
|
cl_int error;
|
||||||
|
cl_context gpu_context = clCreateContext(settings.context_properties, settings.number_of_devices_entries,
|
||||||
|
&device, settings.context_callback, settings.context_callback_data,
|
||||||
|
&error);
|
||||||
|
OCL::checkError(error);
|
||||||
|
|
||||||
|
return gpu_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cl_command_queue makeCommandQueue(cl_device_id & device, cl_context & context, const Settings & settings [[maybe_unused]])
|
||||||
|
{
|
||||||
|
cl_int error;
|
||||||
|
#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||||
|
cl_command_queue command_queue = clCreateCommandQueue(context, device, settings.command_queue_properties, &error);
|
||||||
|
#else
|
||||||
|
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device, nullptr, &error);
|
||||||
|
#endif
|
||||||
|
OCL::checkError(error);
|
||||||
|
|
||||||
|
return command_queue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cl_program makeProgram(const char * source_code, cl_context context,
|
||||||
|
cl_device_id device_id, const Settings & settings)
|
||||||
|
{
|
||||||
|
cl_int error = 0;
|
||||||
|
size_t source_size = strlen(source_code);
|
||||||
|
|
||||||
|
cl_program program = clCreateProgramWithSource(context, settings.number_of_program_source_pointers, &source_code, &source_size, &error);
|
||||||
|
checkError(error);
|
||||||
|
|
||||||
|
error = clBuildProgram(program, settings.number_of_devices_entries, &device_id, settings.build_options,
|
||||||
|
settings.build_notification_routine, settings.build_callback_data);
|
||||||
|
|
||||||
|
/// Combining additional logs output when program build failed.
|
||||||
|
if (error == CL_BUILD_PROGRAM_FAILURE)
|
||||||
|
{
|
||||||
|
size_t log_size;
|
||||||
|
error = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
|
||||||
|
|
||||||
|
checkError(error);
|
||||||
|
|
||||||
|
std::vector<char> log(log_size);
|
||||||
|
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log.data(), nullptr);
|
||||||
|
|
||||||
|
checkError(error);
|
||||||
|
throw DB::Exception(log.data(), DB::ErrorCodes::OPENCL_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
checkError(error);
|
||||||
|
|
||||||
|
return program;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Configuring buffer for given input data
|
||||||
|
|
||||||
|
template<typename K>
|
||||||
|
static cl_mem createBuffer(K * p_input, cl_int array_size, cl_context context,
|
||||||
|
cl_int elements_size = sizeof(K))
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
cl_mem cl_input_buffer =
|
||||||
|
clCreateBuffer
|
||||||
|
(
|
||||||
|
context,
|
||||||
|
CL_MEM_USE_HOST_PTR,
|
||||||
|
zeroCopySizeAlignment(elements_size * array_size),
|
||||||
|
p_input,
|
||||||
|
&error
|
||||||
|
);
|
||||||
|
checkError(error);
|
||||||
|
|
||||||
|
return cl_input_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static size_t zeroCopySizeAlignment(size_t required_size)
|
||||||
|
{
|
||||||
|
return required_size + (~required_size + 1) % 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Manipulating with common OpenCL variables.
|
||||||
|
|
||||||
|
static void finishCommandQueue(cl_command_queue command_queue)
|
||||||
|
{
|
||||||
|
// Blocks until all previously queued OpenCL commands in a queue are issued to the associated device.
|
||||||
|
cl_int error = clFinish(command_queue);
|
||||||
|
OCL::checkError(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
static void releaseData(T * origin, cl_int array_size, cl_mem cl_buffer,
|
||||||
|
cl_command_queue command_queue, size_t offset = 0)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
|
||||||
|
void * tmp_ptr = nullptr;
|
||||||
|
|
||||||
|
// No events specified to be completed before enqueueing buffers,
|
||||||
|
// so `num_events_in_wait_list` passed with `0` value.
|
||||||
|
|
||||||
|
tmp_ptr = clEnqueueMapBuffer(command_queue, cl_buffer, true, CL_MAP_READ,
|
||||||
|
offset, sizeof(cl_int) * array_size, 0, nullptr, nullptr, &error);
|
||||||
|
OCL::checkError(error);
|
||||||
|
if (tmp_ptr != origin)
|
||||||
|
throw DB::Exception("clEnqueueMapBuffer failed to return original pointer", DB::ErrorCodes::OPENCL_ERROR);
|
||||||
|
|
||||||
|
error = clEnqueueUnmapMemObject(command_queue, cl_buffer, tmp_ptr, 0, nullptr, nullptr);
|
||||||
|
checkError(error);
|
||||||
|
|
||||||
|
error = clReleaseMemObject(cl_buffer);
|
||||||
|
checkError(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -35,6 +35,9 @@ target_link_libraries (compact_array PRIVATE clickhouse_common_io)
|
|||||||
add_executable (radix_sort radix_sort.cpp)
|
add_executable (radix_sort radix_sort.cpp)
|
||||||
target_link_libraries (radix_sort PRIVATE clickhouse_common_io)
|
target_link_libraries (radix_sort PRIVATE clickhouse_common_io)
|
||||||
|
|
||||||
|
add_executable (bitonic_sort bitonic_sort.cpp)
|
||||||
|
target_link_libraries (bitonic_sort PRIVATE clickhouse_common_io "-framework OpenCL")
|
||||||
|
|
||||||
add_executable (arena_with_free_lists arena_with_free_lists.cpp)
|
add_executable (arena_with_free_lists arena_with_free_lists.cpp)
|
||||||
target_link_libraries (arena_with_free_lists PRIVATE dbms)
|
target_link_libraries (arena_with_free_lists PRIVATE dbms)
|
||||||
|
|
||||||
|
248
src/Common/tests/bitonic_sort.cpp
Normal file
248
src/Common/tests/bitonic_sort.cpp
Normal file
@ -0,0 +1,248 @@
|
|||||||
|
#include <Common/config.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#if USE_OPENCL
|
||||||
|
|
||||||
|
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
#include <ext/bit_cast.h>
|
||||||
|
#include <Common/Stopwatch.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
#include <Core/Defines.h>
|
||||||
|
#include <climits>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "Common/BitonicSort.h"
|
||||||
|
|
||||||
|
|
||||||
|
using Key = cl_ulong;
|
||||||
|
|
||||||
|
|
||||||
|
/// Generates vector of size 8 for testing.
|
||||||
|
/// Vector contains max possible value, min possible value and duplicate values.
|
||||||
|
template <class Type>
|
||||||
|
static void generateTest(std::vector<Type>& data, Type min_value, Type max_value)
|
||||||
|
{
|
||||||
|
int size = 10;
|
||||||
|
|
||||||
|
data.resize(size);
|
||||||
|
data[0] = 10;
|
||||||
|
data[1] = max_value;
|
||||||
|
data[2] = 10;
|
||||||
|
data[3] = 20;
|
||||||
|
data[4] = min_value;
|
||||||
|
data[5] = min_value + 1;
|
||||||
|
data[6] = max_value - 5;
|
||||||
|
data[7] = 1;
|
||||||
|
data[8] = 42;
|
||||||
|
data[9] = max_value - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void check(const std::vector<size_t> & indices, bool reverse = true)
|
||||||
|
{
|
||||||
|
std::vector<size_t> reference_indices{4, 5, 7, 0, 2, 3, 8, 6, 9, 1};
|
||||||
|
if (reverse) std::reverse(reference_indices.begin(), reference_indices.end());
|
||||||
|
|
||||||
|
bool success = true;
|
||||||
|
for (size_t index = 0; index < reference_indices.size(); ++index)
|
||||||
|
{
|
||||||
|
if (indices[index] != reference_indices[index])
|
||||||
|
{
|
||||||
|
success = false;
|
||||||
|
std::cerr << "Test failed. Reason: indices[" << index << "] = "
|
||||||
|
<< indices[index] << ", it must be equal to " << reference_indices[index] << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string order_description = reverse ? "descending" : "ascending";
|
||||||
|
std::cerr << "Sorted " << order_description << " sequence. Result: " << (success ? "Ok." : "Fail!") << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class Type>
|
||||||
|
static void sortBitonicSortWithPodArrays(const std::vector<Type>& data,
|
||||||
|
std::vector<size_t> & indices, bool ascending = true)
|
||||||
|
{
|
||||||
|
DB::PaddedPODArray<Type> pod_array_data = DB::PaddedPODArray<Type>(data.size());
|
||||||
|
DB::IColumn::Permutation pod_array_indices = DB::IColumn::Permutation(data.size());
|
||||||
|
|
||||||
|
for (size_t index = 0; index < data.size(); ++index)
|
||||||
|
{
|
||||||
|
*(pod_array_data.data() + index) = data[index];
|
||||||
|
*(pod_array_indices.data() + index) = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
BitonicSort::getInstance().configure();
|
||||||
|
BitonicSort::getInstance().sort(pod_array_data, pod_array_indices, ascending);
|
||||||
|
|
||||||
|
for (size_t index = 0; index < data.size(); ++index)
|
||||||
|
indices[index] = pod_array_indices[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class Type>
|
||||||
|
static void testBitonicSort(std::string test_name, Type min_value, Type max_value)
|
||||||
|
{
|
||||||
|
std::cerr << test_name << std::endl;
|
||||||
|
|
||||||
|
std::vector<Type> data;
|
||||||
|
generateTest<Type>(data, min_value, max_value);
|
||||||
|
|
||||||
|
std::vector<size_t> indices(data.size());
|
||||||
|
|
||||||
|
sortBitonicSortWithPodArrays(data, indices, true);
|
||||||
|
check(indices, false);
|
||||||
|
|
||||||
|
sortBitonicSortWithPodArrays(data, indices, false);
|
||||||
|
check(indices, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void straightforwardTests()
|
||||||
|
{
|
||||||
|
testBitonicSort<cl_char>("Test 01: cl_char.", CHAR_MIN, CHAR_MAX);
|
||||||
|
testBitonicSort<cl_uchar>("Test 02: cl_uchar.", 0, UCHAR_MAX);
|
||||||
|
testBitonicSort<cl_short>("Test 03: cl_short.", SHRT_MIN, SHRT_MAX);
|
||||||
|
testBitonicSort<cl_ushort>("Test 04: cl_ushort.", 0, USHRT_MAX);
|
||||||
|
testBitonicSort<cl_int>("Test 05: cl_int.", INT_MIN, INT_MAX);
|
||||||
|
testBitonicSort<cl_uint >("Test 06: cl_uint.", 0, UINT_MAX);
|
||||||
|
testBitonicSort<cl_long >("Test 07: cl_long.", LONG_MIN, LONG_MAX);
|
||||||
|
testBitonicSort<cl_ulong >("Test 08: cl_ulong.", 0, ULONG_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void NO_INLINE sort1(Key * data, size_t size)
|
||||||
|
{
|
||||||
|
std::sort(data, data + size);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void NO_INLINE sort2(std::vector<Key> & data, std::vector<size_t> & indices)
|
||||||
|
{
|
||||||
|
BitonicSort::getInstance().configure();
|
||||||
|
|
||||||
|
sortBitonicSortWithPodArrays(data, indices);
|
||||||
|
|
||||||
|
std::vector<Key> result(data.size());
|
||||||
|
for (size_t index = 0; index < data.size(); ++index)
|
||||||
|
result[index] = data[indices[index]];
|
||||||
|
|
||||||
|
data = std::move(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char ** argv)
|
||||||
|
{
|
||||||
|
straightforwardTests();
|
||||||
|
|
||||||
|
if (argc < 3)
|
||||||
|
{
|
||||||
|
std::cerr << "Not enough arguments were passed\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t n = DB::parse<size_t>(argv[1]);
|
||||||
|
size_t method = DB::parse<size_t>(argv[2]);
|
||||||
|
|
||||||
|
std::vector<Key> data(n);
|
||||||
|
std::vector<size_t> indices(n);
|
||||||
|
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
|
for (auto & elem : data)
|
||||||
|
elem = static_cast<Key>(rand());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i)
|
||||||
|
indices[i] = i;
|
||||||
|
|
||||||
|
watch.stop();
|
||||||
|
double elapsed = watch.elapsedSeconds();
|
||||||
|
std::cerr
|
||||||
|
<< "Filled in " << elapsed
|
||||||
|
<< " (" << n / elapsed << " elem/sec., "
|
||||||
|
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n <= 100)
|
||||||
|
{
|
||||||
|
std::cerr << std::endl;
|
||||||
|
for (const auto & elem : data)
|
||||||
|
std::cerr << elem << ' ';
|
||||||
|
std::cerr << std::endl;
|
||||||
|
for (const auto & index : indices)
|
||||||
|
std::cerr << index << ' ';
|
||||||
|
std::cerr << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
|
if (method == 1) sort1(data.data(), n);
|
||||||
|
if (method == 2) sort2(data, indices);
|
||||||
|
|
||||||
|
watch.stop();
|
||||||
|
double elapsed = watch.elapsedSeconds();
|
||||||
|
std::cerr
|
||||||
|
<< "Sorted in " << elapsed
|
||||||
|
<< " (" << n / elapsed << " elem/sec., "
|
||||||
|
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
|
size_t i = 1;
|
||||||
|
while (i < n)
|
||||||
|
{
|
||||||
|
if (!(data[i - 1] <= data[i]))
|
||||||
|
break;
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
|
||||||
|
watch.stop();
|
||||||
|
double elapsed = watch.elapsedSeconds();
|
||||||
|
std::cerr
|
||||||
|
<< "Checked in " << elapsed
|
||||||
|
<< " (" << n / elapsed << " elem/sec., "
|
||||||
|
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
|
||||||
|
<< std::endl
|
||||||
|
<< "Result: " << (i == n ? "Ok." : "Fail!") << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n <= 1000)
|
||||||
|
{
|
||||||
|
std::cerr << std::endl;
|
||||||
|
|
||||||
|
std::cerr << data[0] << ' ';
|
||||||
|
for (size_t i = 1; i < n; ++i)
|
||||||
|
{
|
||||||
|
if (!(data[i - 1] <= data[i]))
|
||||||
|
std::cerr << "*** ";
|
||||||
|
std::cerr << data[i] << ' ';
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cerr << std::endl;
|
||||||
|
|
||||||
|
for (const auto & index : indices)
|
||||||
|
std::cerr << index << ' ';
|
||||||
|
std::cerr << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
std::cerr << "Openc CL disabled.";
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -111,6 +111,8 @@ struct Settings : public SettingsCollection<Settings>
|
|||||||
M(SettingUInt64, parallel_replicas_count, 0, "", 0) \
|
M(SettingUInt64, parallel_replicas_count, 0, "", 0) \
|
||||||
M(SettingUInt64, parallel_replica_offset, 0, "", 0) \
|
M(SettingUInt64, parallel_replica_offset, 0, "", 0) \
|
||||||
\
|
\
|
||||||
|
M(SettingSpecialSort, special_sort, SpecialSort::NOT_SPECIFIED, "Specifies a sorting algorithm which will be using in ORDER BY query.", 0) \
|
||||||
|
\
|
||||||
M(SettingBool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
|
M(SettingBool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
|
||||||
\
|
\
|
||||||
M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \
|
M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \
|
||||||
|
@ -485,6 +485,12 @@ void SettingURI::deserialize(ReadBuffer & buf, SettingsBinaryFormat)
|
|||||||
IMPLEMENT_SETTING_ENUM(LoadBalancing, LOAD_BALANCING_LIST_OF_NAMES, ErrorCodes::UNKNOWN_LOAD_BALANCING)
|
IMPLEMENT_SETTING_ENUM(LoadBalancing, LOAD_BALANCING_LIST_OF_NAMES, ErrorCodes::UNKNOWN_LOAD_BALANCING)
|
||||||
|
|
||||||
|
|
||||||
|
#define SPECIAL_SORT_ALGORITHM_NAMES(M) \
|
||||||
|
M(NOT_SPECIFIED, "not_specified") \
|
||||||
|
M(OPENCL_BITONIC, "opencl_bitonic")
|
||||||
|
IMPLEMENT_SETTING_ENUM(SpecialSort, SPECIAL_SORT_ALGORITHM_NAMES, ErrorCodes::UNKNOWN_JOIN)
|
||||||
|
|
||||||
|
|
||||||
#define JOIN_STRICTNESS_LIST_OF_NAMES(M) \
|
#define JOIN_STRICTNESS_LIST_OF_NAMES(M) \
|
||||||
M(Unspecified, "") \
|
M(Unspecified, "") \
|
||||||
M(ALL, "ALL") \
|
M(ALL, "ALL") \
|
||||||
|
@ -251,6 +251,15 @@ enum class JoinAlgorithm
|
|||||||
};
|
};
|
||||||
using SettingJoinAlgorithm = SettingEnum<JoinAlgorithm>;
|
using SettingJoinAlgorithm = SettingEnum<JoinAlgorithm>;
|
||||||
|
|
||||||
|
|
||||||
|
enum class SpecialSort
|
||||||
|
{
|
||||||
|
NOT_SPECIFIED = 0,
|
||||||
|
OPENCL_BITONIC,
|
||||||
|
};
|
||||||
|
using SettingSpecialSort = SettingEnum<SpecialSort>;
|
||||||
|
|
||||||
|
|
||||||
/// Which rows should be included in TOTALS.
|
/// Which rows should be included in TOTALS.
|
||||||
enum class TotalsMode
|
enum class TotalsMode
|
||||||
{
|
{
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
|
#include <Core/SettingsCollection.h>
|
||||||
|
|
||||||
class Collator;
|
class Collator;
|
||||||
|
|
||||||
@ -31,21 +32,22 @@ struct SortColumnDescription
|
|||||||
std::shared_ptr<Collator> collator; /// Collator for locale-specific comparison of strings
|
std::shared_ptr<Collator> collator; /// Collator for locale-specific comparison of strings
|
||||||
bool with_fill;
|
bool with_fill;
|
||||||
FillColumnDescription fill_description;
|
FillColumnDescription fill_description;
|
||||||
|
SpecialSort special_sort;
|
||||||
|
|
||||||
|
|
||||||
SortColumnDescription(
|
SortColumnDescription(
|
||||||
size_t column_number_, int direction_, int nulls_direction_,
|
size_t column_number_, int direction_, int nulls_direction_,
|
||||||
const std::shared_ptr<Collator> & collator_ = nullptr, bool with_fill_ = false,
|
const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
|
||||||
const FillColumnDescription & fill_description_ = {})
|
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||||
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
|
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
|
||||||
, with_fill(with_fill_), fill_description(fill_description_) {}
|
, with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
|
||||||
|
|
||||||
SortColumnDescription(
|
SortColumnDescription(
|
||||||
const std::string & column_name_, int direction_, int nulls_direction_,
|
const std::string & column_name_, int direction_, int nulls_direction_,
|
||||||
const std::shared_ptr<Collator> & collator_ = nullptr, bool with_fill_ = false,
|
const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
|
||||||
const FillColumnDescription & fill_description_ = {})
|
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||||
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
|
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
|
||||||
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_) {}
|
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
|
||||||
|
|
||||||
bool operator == (const SortColumnDescription & other) const
|
bool operator == (const SortColumnDescription & other) const
|
||||||
{
|
{
|
||||||
|
@ -633,6 +633,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
|
|||||||
{
|
{
|
||||||
SortDescription order_descr;
|
SortDescription order_descr;
|
||||||
order_descr.reserve(query.orderBy()->children.size());
|
order_descr.reserve(query.orderBy()->children.size());
|
||||||
|
SpecialSort special_sort = context.getSettings().special_sort.value;
|
||||||
for (const auto & elem : query.orderBy()->children)
|
for (const auto & elem : query.orderBy()->children)
|
||||||
{
|
{
|
||||||
String name = elem->children.front()->getColumnName();
|
String name = elem->children.front()->getColumnName();
|
||||||
@ -646,10 +647,10 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
|
|||||||
{
|
{
|
||||||
FillColumnDescription fill_desc = getWithFillDescription(order_by_elem, context);
|
FillColumnDescription fill_desc = getWithFillDescription(order_by_elem, context);
|
||||||
order_descr.emplace_back(name, order_by_elem.direction,
|
order_descr.emplace_back(name, order_by_elem.direction,
|
||||||
order_by_elem.nulls_direction, collator, true, fill_desc);
|
order_by_elem.nulls_direction, collator, special_sort, true, fill_desc);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator);
|
order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator, special_sort);
|
||||||
}
|
}
|
||||||
|
|
||||||
return order_descr;
|
return order_descr;
|
||||||
|
@ -13,6 +13,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int BAD_COLLATION;
|
extern const int BAD_COLLATION;
|
||||||
|
extern const int OPENCL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isCollationRequired(const SortColumnDescription & description)
|
static bool isCollationRequired(const SortColumnDescription & description)
|
||||||
@ -131,7 +132,22 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
|
|||||||
|
|
||||||
}
|
}
|
||||||
else if (!isColumnConst(*column))
|
else if (!isColumnConst(*column))
|
||||||
column->getPermutation(reverse, limit, description[0].nulls_direction, perm);
|
{
|
||||||
|
int nan_direction_hint = description[0].nulls_direction;
|
||||||
|
|
||||||
|
/// If in Settings `special_sort` option has been set as `bitonic_sort`,
|
||||||
|
/// then via `nan_direction_hint` variable a flag which specifies bitonic sort as preferred
|
||||||
|
/// will be passed to `getPermutation` method with value 42.
|
||||||
|
if (description[0].special_sort == SpecialSort::OPENCL_BITONIC)
|
||||||
|
{
|
||||||
|
nan_direction_hint = 42;
|
||||||
|
#ifndef BITONIC_SORT_PREFERRED
|
||||||
|
throw DB::Exception("Bitonic sort specified as preferred, but OpenCL not available", DB::ErrorCodes::OPENCL_ERROR);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
column->getPermutation(reverse, limit, nan_direction_hint, perm);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
/// we don't need to do anything with const column
|
/// we don't need to do anything with const column
|
||||||
is_column_const = true;
|
is_column_const = true;
|
||||||
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toUInt8(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toInt8(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toUInt16(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toInt16(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toUInt32(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toInt32(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toUInt64(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
@ -0,0 +1,42 @@
|
|||||||
|
82
|
||||||
|
80
|
||||||
|
78
|
||||||
|
76
|
||||||
|
74
|
||||||
|
72
|
||||||
|
70
|
||||||
|
68
|
||||||
|
66
|
||||||
|
64
|
||||||
|
62
|
||||||
|
60
|
||||||
|
58
|
||||||
|
56
|
||||||
|
54
|
||||||
|
52
|
||||||
|
50
|
||||||
|
48
|
||||||
|
46
|
||||||
|
44
|
||||||
|
42
|
||||||
|
40
|
||||||
|
38
|
||||||
|
36
|
||||||
|
34
|
||||||
|
32
|
||||||
|
30
|
||||||
|
28
|
||||||
|
26
|
||||||
|
24
|
||||||
|
22
|
||||||
|
20
|
||||||
|
18
|
||||||
|
16
|
||||||
|
14
|
||||||
|
12
|
||||||
|
10
|
||||||
|
8
|
||||||
|
6
|
||||||
|
4
|
||||||
|
2
|
||||||
|
0
|
@ -0,0 +1 @@
|
|||||||
|
select toInt64(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'
|
Loading…
Reference in New Issue
Block a user