Bitonic sort on GPU (OpenCL) (#10232)

This commit is contained in:
Ri 2020-05-15 03:01:14 +03:00 committed by GitHub
parent 21ce8dbbc3
commit fc7afaa639
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 2580 additions and 9 deletions

View File

@ -346,6 +346,7 @@ include (cmake/find/libgsasl.cmake)
include (cmake/find/rdkafka.cmake) include (cmake/find/rdkafka.cmake)
include (cmake/find/capnp.cmake) include (cmake/find/capnp.cmake)
include (cmake/find/llvm.cmake) include (cmake/find/llvm.cmake)
include (cmake/find/opencl.cmake)
include (cmake/find/h3.cmake) include (cmake/find/h3.cmake)
include (cmake/find/libxml2.cmake) include (cmake/find/libxml2.cmake)
include (cmake/find/brotli.cmake) include (cmake/find/brotli.cmake)
@ -382,6 +383,15 @@ if (OS_LINUX AND NOT ENABLE_JEMALLOC)
message (WARNING "Non default allocator is disabled. This is not recommended for production Linux builds.") message (WARNING "Non default allocator is disabled. This is not recommended for production Linux builds.")
endif () endif ()
if (USE_OPENCL)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBITONIC_SORT_PREFERRED")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBITONIC_SORT_PREFERRED")
if (OS_DARWIN)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -framework OpenCL")
endif ()
endif ()
include (cmake/print_flags.cmake) include (cmake/print_flags.cmake)
if (TARGET global-group) if (TARGET global-group)

17
cmake/find/opencl.cmake Normal file
View File

@ -0,0 +1,17 @@
if(ENABLE_OPENCL)
# Intel OpenCl driver: sudo apt install intel-opencl-icd
# TODO It's possible to add it as submodules: https://github.com/intel/compute-runtime/releases
# OpenCL applications should link wiht ICD loader
# sudo apt install opencl-headers ocl-icd-libopencl1
# sudo ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so
find_package(OpenCL REQUIRED)
if(OpenCL_FOUND)
set(USE_OPENCL 1)
endif()
endif()
message(STATUS "Using opencl=${USE_OPENCL}: ${OpenCL_INCLUDE_DIRS} : ${OpenCL_LIBRARIES}")

View File

@ -61,6 +61,10 @@
#include <Common/ThreadFuzzer.h> #include <Common/ThreadFuzzer.h>
#include "MySQLHandlerFactory.h" #include "MySQLHandlerFactory.h"
#ifdef BITONIC_SORT_PREFERRED
#include "Common/BitonicSort.h"
#endif
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include "config_core.h" # include "config_core.h"
# include "Common/config_version.h" # include "Common/config_version.h"
@ -221,6 +225,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
registerDictionaries(); registerDictionaries();
registerDisks(); registerDisks();
#if defined (BITONIC_SORT_PREFERRED)
BitonicSort::getInstance().configure();
#endif
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get()); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());

View File

@ -346,6 +346,11 @@ if (USE_BROTLI)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
endif() endif()
if (USE_OPENCL)
target_link_libraries (clickhouse_common_io PRIVATE ${OpenCL_LIBRARIES})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS})
endif ()
dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR}) dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR})
target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR})

View File

@ -18,6 +18,15 @@
#include <ext/bit_cast.h> #include <ext/bit_cast.h>
#include <pdqsort.h> #include <pdqsort.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
# if USE_OPENCL
# include "Common/BitonicSort.h"
# endif
#else
#undef USE_OPENCL
#endif
#ifdef __SSE2__ #ifdef __SSE2__
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
@ -135,6 +144,12 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
} }
else else
{ {
#if USE_OPENCL
/// If bitonic sort if specified as preferred than `nan_direction_hint` equals specific value 42.
if (nan_direction_hint == 42 && BitonicSort::getInstance().sort(data, res, !reverse))
return;
#endif
/// A case for radix sort /// A case for radix sort
if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>) if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
{ {

270
src/Common/BitonicSort.h Normal file
View File

@ -0,0 +1,270 @@
#pragma once
#include <string.h>
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <cstdint>
#include <map>
#include <type_traits>
#include <ext/bit_cast.h>
#include <Core/Types.h>
#include <Core/Defines.h>
#include <Common/PODArray.h>
#include <Columns/ColumnsCommon.h>
#include "oclBasics.cpp"
#include "bitonicSortKernels.cl"
class BitonicSort
{
public:
static BitonicSort & getInstance()
{
static BitonicSort instance = BitonicSort();
return instance;
}
/// Sorts given array in specified order. Returns `true` if given sequence was sorted, `false` otherwise.
template <typename T>
bool sort(const DB::PaddedPODArray<T> & data, DB::IColumn::Permutation & res, cl_uint sort_ascending)
{
size_t s = data.size();
/// Getting the nearest power of 2.
size_t power = 1;
if (s <= 8) power = 8;
else while (power < s) power <<= 1;
/// Allocates more space for additional stubs to be added if needed.
std::vector<T> pairs_content(power);
std::vector<UInt32> pairs_indices(power);
for (UInt32 i = 0; i < s; ++i)
{
pairs_content[i] = data[i];
pairs_indices[i] = i;
}
bool result = sort(pairs_content.data(), pairs_indices.data(), s, power - s, sort_ascending);
if (!result) return false;
for (size_t i = 0, shift = 0; i < power; ++i)
{
if (pairs_indices[i] >= s)
{
++shift;
continue;
}
res[i - shift] = pairs_indices[i];
}
return true;
}
/// Creating a configuration instance with making all OpenCl required variables
/// such as device, platform, context, queue, program and kernel.
void configure()
{
OCL::Settings settings = OCL::Settings(1, nullptr, 1, nullptr, 1, 0);
cl_platform_id platform = OCL::getPlatformID(settings);
cl_device_id device = OCL::getDeviceID(platform, settings);
cl_context gpu_context = OCL::makeContext(device, settings);
cl_command_queue command_queue = OCL::makeCommandQueue(device, gpu_context, settings);
cl_program program = OCL::makeProgram(bitonic_sort_kernels, gpu_context, device, settings);
/// Creating kernels for each specified data type.
cl_int error = 0;
kernels["char"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_char", &error),
clReleaseKernel);
kernels["uchar"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uchar", &error),
clReleaseKernel);
kernels["short"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_short", &error),
clReleaseKernel);
kernels["ushort"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ushort", &error),
clReleaseKernel);
kernels["int"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_int", &error),
clReleaseKernel);
kernels["uint"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uint", &error),
clReleaseKernel);
kernels["long"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_long", &error),
clReleaseKernel);
kernels["ulong"] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ulong", &error),
clReleaseKernel);
OCL::checkError(error);
configuration = std::shared_ptr<OCL::Configuration>(new OCL::Configuration(device, gpu_context, command_queue, program));
}
private:
/// Dictionary with kernels for each type from list: uchar, char, ushort, short, uint, int, ulong and long.
std::map<std::string, std::shared_ptr<KernelType>> kernels;
/// Current configuration with core OpenCL instances.
std::shared_ptr<OCL::Configuration> configuration = nullptr;
/// Returns `true` if given sequence was sorted, `false` otherwise.
template <typename T>
bool sort(T * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
if (typeid(T).name() == typeid(cl_char).name())
sort_char(reinterpret_cast<cl_char *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else if (typeid(T) == typeid(cl_uchar))
sort_uchar(reinterpret_cast<cl_uchar *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else if (typeid(T) == typeid(cl_short))
sort_short(reinterpret_cast<cl_short *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else if (typeid(T) == typeid(cl_ushort))
sort_ushort(reinterpret_cast<cl_ushort *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else if (typeid(T) == typeid(cl_int))
sort_int(reinterpret_cast<cl_int *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else if (typeid(T) == typeid(cl_uint))
sort_uint(reinterpret_cast<cl_uint *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else if (typeid(T) == typeid(cl_long))
sort_long(reinterpret_cast<cl_long *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else if (typeid(T) == typeid(cl_ulong))
sort_ulong(reinterpret_cast<cl_ulong *>(p_input), indices, array_size, number_of_stubs, sort_ascending);
else
return false;
return true;
}
/// Specific functions for each integer type.
void sort_char(cl_char * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_char stubs_value = sort_ascending ? CHAR_MAX : CHAR_MIN;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["char"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
void sort_uchar(cl_uchar * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_uchar stubs_value = sort_ascending ? UCHAR_MAX : 0;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["uchar"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
void sort_short(cl_short * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_short stubs_value = sort_ascending ? SHRT_MAX : SHRT_MIN;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["short"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
void sort_ushort(cl_ushort * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_ushort stubs_value = sort_ascending ? USHRT_MAX : 0;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["ushort"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
void sort_int(cl_int * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_int stubs_value = sort_ascending ? INT_MAX : INT_MIN;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["int"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
void sort_uint(cl_uint * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_uint stubs_value = sort_ascending ? UINT_MAX : 0;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["uint"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
void sort_long(cl_long * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_long stubs_value = sort_ascending ? LONG_MAX : LONG_MIN;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["long"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
void sort_ulong(cl_ulong * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending)
{
cl_ulong stubs_value = sort_ascending ? ULONG_MAX : 0;
fillWithStubs(number_of_stubs, stubs_value, p_input, indices, array_size);
sort(kernels["ulong"].get(), p_input, indices, array_size + number_of_stubs, sort_ascending);
}
/// Sorts p_input inplace with indices. Works only with arrays which size equals to power of two.
template <class T>
void sort(cl_kernel kernel, T * p_input, cl_uint * indices, cl_int array_size, cl_uint sort_ascending)
{
cl_int error = CL_SUCCESS;
cl_int num_stages = 0;
for (cl_int temp = array_size; temp > 2; temp >>= 1)
num_stages++;
/// Creating OpenCL buffers using input arrays memory.
cl_mem cl_input_buffer = OCL::createBuffer<T>(p_input, array_size, configuration.get()->context());
cl_mem cl_indices_buffer = OCL::createBuffer<cl_uint>(indices, array_size, configuration.get()->context());
configureKernel<cl_mem>(kernel, 0, static_cast<void *>(&cl_input_buffer));
configureKernel<cl_mem>(kernel, 1, static_cast<void *>(&cl_indices_buffer));
configureKernel<cl_uint>(kernel, 4, static_cast<void *>(&sort_ascending));
for (cl_int stage = 0; stage < num_stages; stage++)
{
configureKernel<cl_uint>(kernel, 2, static_cast<void *>(&stage));
for (cl_int pass_of_stage = stage; pass_of_stage >= 0; pass_of_stage--)
{
configureKernel<cl_uint>(kernel, 3, static_cast<void *>(&pass_of_stage));
/// Setting work-item dimensions.
size_t gsize = array_size / (2 * 4);
size_t global_work_size[1] = {pass_of_stage ? gsize : gsize << 1 }; // number of quad items in input array
/// Executing kernel.
error = clEnqueueNDRangeKernel(configuration.get()->commandQueue(), kernel, 1, nullptr,
global_work_size, nullptr, 0, nullptr, nullptr);
OCL::checkError(error);
}
}
/// Syncs all threads.
OCL::finishCommandQueue(configuration.get()->commandQueue());
OCL::releaseData(p_input, array_size, cl_input_buffer, configuration.get()->commandQueue());
OCL::releaseData(indices, array_size, cl_indices_buffer, configuration.get()->commandQueue());
}
template <class T>
void configureKernel(cl_kernel kernel, int number_of_argument, void * source)
{
cl_int error = clSetKernelArg(kernel, number_of_argument, sizeof(T), source);
OCL::checkError(error);
}
/// Fills given sequences from `arraySize` index with `numberOfStubs` values.
template <class T>
void fillWithStubs(cl_int number_of_stubs, T value, T * p_input,
cl_uint * indices, cl_int array_size)
{
for (cl_int index = 0; index < number_of_stubs; ++index)
{
p_input[array_size + index] = value;
indices[array_size + index] = array_size + index;
}
}
BitonicSort() {}
BitonicSort(BitonicSort const &);
void operator=(BitonicSort const &);
};

View File

@ -493,6 +493,7 @@ namespace ErrorCodes
extern const int NO_REMOTE_SHARD_AVAILABLE = 519; extern const int NO_REMOTE_SHARD_AVAILABLE = 519;
extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE = 520; extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE = 520;
extern const int ATOMIC_RENAME_FAIL = 521; extern const int ATOMIC_RENAME_FAIL = 521;
extern const int OPENCL_ERROR = 522;
extern const int KEEPER_EXCEPTION = 999; extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000; extern const int POCO_EXCEPTION = 1000;

File diff suppressed because it is too large Load Diff

View File

@ -8,4 +8,5 @@
#cmakedefine01 USE_AWS_S3 #cmakedefine01 USE_AWS_S3
#cmakedefine01 USE_BROTLI #cmakedefine01 USE_BROTLI
#cmakedefine01 USE_UNWIND #cmakedefine01 USE_UNWIND
#cmakedefine01 USE_OPENCL
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 CLICKHOUSE_SPLIT_BINARY

363
src/Common/oclBasics.cpp Normal file
View File

@ -0,0 +1,363 @@
#include <Common/config.h>
#if USE_OPENCL
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include <algorithm>
#include <Core/Types.h>
#include <Common/Exception.h>
#ifndef CL_VERSION_2_0
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#endif
using KernelType = std::remove_reference<decltype(*cl_kernel())>::type;
namespace DB
{
namespace ErrorCodes
{
extern const int OPENCL_ERROR;
}
}
struct OCL
{
/**
* Structure which represents the most essential settings of common OpenCl entities.
*/
struct Settings
{
// Platform info
cl_uint number_of_platform_entries;
cl_uint * number_of_available_platforms;
// Devices info
cl_uint number_of_devices_entries;
cl_uint * number_of_available_devices;
// Context settings
cl_context_properties * context_properties;
void (* context_callback)(const char *, const void *, size_t, void *);
void * context_callback_data;
// Command queue settings
cl_command_queue_properties command_queue_properties;
// Build settings
cl_uint number_of_program_source_pointers;
void (* build_notification_routine)(cl_program, void *user_data);
void * build_callback_data;
char * build_options;
Settings(cl_uint number_of_platform_entries_,
cl_uint * number_of_available_platforms_,
cl_uint number_of_devices_entries_,
cl_uint * number_of_available_devices_,
cl_uint number_of_program_source_pointers_,
cl_command_queue_properties command_queue_properties_,
cl_context_properties * context_properties_ = nullptr,
void * context_data_callback_ = nullptr,
void (* context_callback_)(const char *, const void *, size_t, void *) = nullptr,
void (* build_notification_routine_)(cl_program, void * user_data) = nullptr,
void * build_callback_data_ = nullptr,
char * build_options_ = nullptr)
{
this->number_of_platform_entries = number_of_platform_entries_;
this->number_of_available_platforms = number_of_available_platforms_;
this->number_of_devices_entries = number_of_devices_entries_;
this->number_of_available_devices = number_of_available_devices_;
this->number_of_program_source_pointers = number_of_program_source_pointers_;
this->command_queue_properties = command_queue_properties_;
this->context_properties = context_properties_;
this->context_callback = context_callback_;
this->context_callback_data = context_data_callback_;
this->build_notification_routine = build_notification_routine_;
this->build_callback_data = build_callback_data_;
this->build_options = build_options_;
}
};
/**
* Configuration with already created OpenCl common entities.
*/
class Configuration
{
public:
Configuration(cl_device_id device, cl_context gpu_context,
cl_command_queue command_queue, cl_program program)
{
this->device_ = device;
this->gpu_context_ = std::shared_ptr<ContextType>(gpu_context, clReleaseContext);
this->command_queue_ = std::shared_ptr<CommandQueueType>(command_queue, clReleaseCommandQueue);
this->program_ = std::shared_ptr<ProgramType>(program, clReleaseProgram);
}
cl_device_id device() { return device_; }
cl_context context() { return gpu_context_.get(); }
cl_command_queue commandQueue() { return command_queue_.get(); }
cl_program program() { return program_.get(); }
private:
using ProgramType = std::remove_reference<decltype(*cl_program())>::type;
using CommandQueueType = std::remove_reference<decltype(*cl_command_queue())>::type;
using ContextType = std::remove_reference<decltype(*cl_context())>::type;
cl_device_id device_;
std::shared_ptr<ContextType> gpu_context_;
std::shared_ptr<CommandQueueType> command_queue_;
std::shared_ptr<ProgramType> program_;
};
static String opencl_error_to_str(cl_int error)
{
#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME;
// Suppose that no combinations are possible.
switch (error)
{
CASE_CL_CONSTANT(CL_SUCCESS)
CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_MAP_FAILURE)
CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
CASE_CL_CONSTANT(CL_COMPILE_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_LINKER_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_LINK_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_DEVICE_PARTITION_FAILED)
CASE_CL_CONSTANT(CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_INVALID_VALUE)
CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
CASE_CL_CONSTANT(CL_INVALID_DEVICE)
CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
CASE_CL_CONSTANT(CL_INVALID_BINARY)
CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
CASE_CL_CONSTANT(CL_INVALID_KERNEL)
CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_EVENT)
CASE_CL_CONSTANT(CL_INVALID_OPERATION)
CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_DESCRIPTOR)
CASE_CL_CONSTANT(CL_INVALID_COMPILER_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_LINKER_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_DEVICE_PARTITION_COUNT)
default:
return "UNKNOWN ERROR CODE ";
}
#undef CASE_CL_CONSTANT
}
static void checkError(cl_int error)
{
if (error != CL_SUCCESS)
throw DB::Exception("OpenCL error " + opencl_error_to_str(error), DB::ErrorCodes::OPENCL_ERROR);
}
/// Getting OpenCl main entities.
static cl_platform_id getPlatformID(const Settings & settings)
{
cl_platform_id platform;
cl_int error = clGetPlatformIDs(settings.number_of_platform_entries, &platform,
settings.number_of_available_platforms);
checkError(error);
return platform;
}
static cl_device_id getDeviceID(cl_platform_id & platform, const Settings & settings)
{
cl_device_id device;
cl_int error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, settings.number_of_devices_entries,
&device, settings.number_of_available_devices);
OCL::checkError(error);
return device;
}
static cl_context makeContext(cl_device_id & device, const Settings & settings)
{
cl_int error;
cl_context gpu_context = clCreateContext(settings.context_properties, settings.number_of_devices_entries,
&device, settings.context_callback, settings.context_callback_data,
&error);
OCL::checkError(error);
return gpu_context;
}
static cl_command_queue makeCommandQueue(cl_device_id & device, cl_context & context, const Settings & settings [[maybe_unused]])
{
cl_int error;
#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS
cl_command_queue command_queue = clCreateCommandQueue(context, device, settings.command_queue_properties, &error);
#else
cl_command_queue command_queue = clCreateCommandQueueWithProperties(context, device, nullptr, &error);
#endif
OCL::checkError(error);
return command_queue;
}
static cl_program makeProgram(const char * source_code, cl_context context,
cl_device_id device_id, const Settings & settings)
{
cl_int error = 0;
size_t source_size = strlen(source_code);
cl_program program = clCreateProgramWithSource(context, settings.number_of_program_source_pointers, &source_code, &source_size, &error);
checkError(error);
error = clBuildProgram(program, settings.number_of_devices_entries, &device_id, settings.build_options,
settings.build_notification_routine, settings.build_callback_data);
/// Combining additional logs output when program build failed.
if (error == CL_BUILD_PROGRAM_FAILURE)
{
size_t log_size;
error = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
checkError(error);
std::vector<char> log(log_size);
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log.data(), nullptr);
checkError(error);
throw DB::Exception(log.data(), DB::ErrorCodes::OPENCL_ERROR);
}
checkError(error);
return program;
}
/// Configuring buffer for given input data
template<typename K>
static cl_mem createBuffer(K * p_input, cl_int array_size, cl_context context,
cl_int elements_size = sizeof(K))
{
cl_int error = CL_SUCCESS;
cl_mem cl_input_buffer =
clCreateBuffer
(
context,
CL_MEM_USE_HOST_PTR,
zeroCopySizeAlignment(elements_size * array_size),
p_input,
&error
);
checkError(error);
return cl_input_buffer;
}
static size_t zeroCopySizeAlignment(size_t required_size)
{
return required_size + (~required_size + 1) % 64;
}
/// Manipulating with common OpenCL variables.
static void finishCommandQueue(cl_command_queue command_queue)
{
// Blocks until all previously queued OpenCL commands in a queue are issued to the associated device.
cl_int error = clFinish(command_queue);
OCL::checkError(error);
}
template<class T>
static void releaseData(T * origin, cl_int array_size, cl_mem cl_buffer,
cl_command_queue command_queue, size_t offset = 0)
{
cl_int error = CL_SUCCESS;
void * tmp_ptr = nullptr;
// No events specified to be completed before enqueueing buffers,
// so `num_events_in_wait_list` passed with `0` value.
tmp_ptr = clEnqueueMapBuffer(command_queue, cl_buffer, true, CL_MAP_READ,
offset, sizeof(cl_int) * array_size, 0, nullptr, nullptr, &error);
OCL::checkError(error);
if (tmp_ptr != origin)
throw DB::Exception("clEnqueueMapBuffer failed to return original pointer", DB::ErrorCodes::OPENCL_ERROR);
error = clEnqueueUnmapMemObject(command_queue, cl_buffer, tmp_ptr, 0, nullptr, nullptr);
checkError(error);
error = clReleaseMemObject(cl_buffer);
checkError(error);
}
};
#endif

View File

@ -35,6 +35,9 @@ target_link_libraries (compact_array PRIVATE clickhouse_common_io)
add_executable (radix_sort radix_sort.cpp) add_executable (radix_sort radix_sort.cpp)
target_link_libraries (radix_sort PRIVATE clickhouse_common_io) target_link_libraries (radix_sort PRIVATE clickhouse_common_io)
add_executable (bitonic_sort bitonic_sort.cpp)
target_link_libraries (bitonic_sort PRIVATE clickhouse_common_io "-framework OpenCL")
add_executable (arena_with_free_lists arena_with_free_lists.cpp) add_executable (arena_with_free_lists arena_with_free_lists.cpp)
target_link_libraries (arena_with_free_lists PRIVATE dbms) target_link_libraries (arena_with_free_lists PRIVATE dbms)

View File

@ -0,0 +1,248 @@
#include <Common/config.h>
#include <iostream>
#if USE_OPENCL
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#include <ext/bit_cast.h>
#include <Common/Stopwatch.h>
#include <IO/ReadHelpers.h>
#include <Core/Defines.h>
#include <climits>
#include <algorithm>
#include "Common/BitonicSort.h"
using Key = cl_ulong;
/// Generates vector of size 8 for testing.
/// Vector contains max possible value, min possible value and duplicate values.
template <class Type>
static void generateTest(std::vector<Type>& data, Type min_value, Type max_value)
{
int size = 10;
data.resize(size);
data[0] = 10;
data[1] = max_value;
data[2] = 10;
data[3] = 20;
data[4] = min_value;
data[5] = min_value + 1;
data[6] = max_value - 5;
data[7] = 1;
data[8] = 42;
data[9] = max_value - 1;
}
static void check(const std::vector<size_t> & indices, bool reverse = true)
{
std::vector<size_t> reference_indices{4, 5, 7, 0, 2, 3, 8, 6, 9, 1};
if (reverse) std::reverse(reference_indices.begin(), reference_indices.end());
bool success = true;
for (size_t index = 0; index < reference_indices.size(); ++index)
{
if (indices[index] != reference_indices[index])
{
success = false;
std::cerr << "Test failed. Reason: indices[" << index << "] = "
<< indices[index] << ", it must be equal to " << reference_indices[index] << "\n";
}
}
std::string order_description = reverse ? "descending" : "ascending";
std::cerr << "Sorted " << order_description << " sequence. Result: " << (success ? "Ok." : "Fail!") << "\n";
}
template <class Type>
static void sortBitonicSortWithPodArrays(const std::vector<Type>& data,
std::vector<size_t> & indices, bool ascending = true)
{
DB::PaddedPODArray<Type> pod_array_data = DB::PaddedPODArray<Type>(data.size());
DB::IColumn::Permutation pod_array_indices = DB::IColumn::Permutation(data.size());
for (size_t index = 0; index < data.size(); ++index)
{
*(pod_array_data.data() + index) = data[index];
*(pod_array_indices.data() + index) = index;
}
BitonicSort::getInstance().configure();
BitonicSort::getInstance().sort(pod_array_data, pod_array_indices, ascending);
for (size_t index = 0; index < data.size(); ++index)
indices[index] = pod_array_indices[index];
}
template <class Type>
static void testBitonicSort(std::string test_name, Type min_value, Type max_value)
{
std::cerr << test_name << std::endl;
std::vector<Type> data;
generateTest<Type>(data, min_value, max_value);
std::vector<size_t> indices(data.size());
sortBitonicSortWithPodArrays(data, indices, true);
check(indices, false);
sortBitonicSortWithPodArrays(data, indices, false);
check(indices, true);
}
static void straightforwardTests()
{
testBitonicSort<cl_char>("Test 01: cl_char.", CHAR_MIN, CHAR_MAX);
testBitonicSort<cl_uchar>("Test 02: cl_uchar.", 0, UCHAR_MAX);
testBitonicSort<cl_short>("Test 03: cl_short.", SHRT_MIN, SHRT_MAX);
testBitonicSort<cl_ushort>("Test 04: cl_ushort.", 0, USHRT_MAX);
testBitonicSort<cl_int>("Test 05: cl_int.", INT_MIN, INT_MAX);
testBitonicSort<cl_uint >("Test 06: cl_uint.", 0, UINT_MAX);
testBitonicSort<cl_long >("Test 07: cl_long.", LONG_MIN, LONG_MAX);
testBitonicSort<cl_ulong >("Test 08: cl_ulong.", 0, ULONG_MAX);
}
static void NO_INLINE sort1(Key * data, size_t size)
{
std::sort(data, data + size);
}
static void NO_INLINE sort2(std::vector<Key> & data, std::vector<size_t> & indices)
{
BitonicSort::getInstance().configure();
sortBitonicSortWithPodArrays(data, indices);
std::vector<Key> result(data.size());
for (size_t index = 0; index < data.size(); ++index)
result[index] = data[indices[index]];
data = std::move(result);
}
int main(int argc, char ** argv)
{
straightforwardTests();
if (argc < 3)
{
std::cerr << "Not enough arguments were passed\n";
return 1;
}
size_t n = DB::parse<size_t>(argv[1]);
size_t method = DB::parse<size_t>(argv[2]);
std::vector<Key> data(n);
std::vector<size_t> indices(n);
{
Stopwatch watch;
for (auto & elem : data)
elem = static_cast<Key>(rand());
for (size_t i = 0; i < n; ++i)
indices[i] = i;
watch.stop();
double elapsed = watch.elapsedSeconds();
std::cerr
<< "Filled in " << elapsed
<< " (" << n / elapsed << " elem/sec., "
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
<< std::endl;
}
if (n <= 100)
{
std::cerr << std::endl;
for (const auto & elem : data)
std::cerr << elem << ' ';
std::cerr << std::endl;
for (const auto & index : indices)
std::cerr << index << ' ';
std::cerr << std::endl;
}
{
Stopwatch watch;
if (method == 1) sort1(data.data(), n);
if (method == 2) sort2(data, indices);
watch.stop();
double elapsed = watch.elapsedSeconds();
std::cerr
<< "Sorted in " << elapsed
<< " (" << n / elapsed << " elem/sec., "
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
<< std::endl;
}
{
Stopwatch watch;
size_t i = 1;
while (i < n)
{
if (!(data[i - 1] <= data[i]))
break;
++i;
}
watch.stop();
double elapsed = watch.elapsedSeconds();
std::cerr
<< "Checked in " << elapsed
<< " (" << n / elapsed << " elem/sec., "
<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
<< std::endl
<< "Result: " << (i == n ? "Ok." : "Fail!") << std::endl;
}
if (n <= 1000)
{
std::cerr << std::endl;
std::cerr << data[0] << ' ';
for (size_t i = 1; i < n; ++i)
{
if (!(data[i - 1] <= data[i]))
std::cerr << "*** ";
std::cerr << data[i] << ' ';
}
std::cerr << std::endl;
for (const auto & index : indices)
std::cerr << index << ' ';
std::cerr << std::endl;
}
return 0;
}
#else
int main()
{
std::cerr << "Openc CL disabled.";
return 0;
}
#endif

View File

@ -111,6 +111,8 @@ struct Settings : public SettingsCollection<Settings>
M(SettingUInt64, parallel_replicas_count, 0, "", 0) \ M(SettingUInt64, parallel_replicas_count, 0, "", 0) \
M(SettingUInt64, parallel_replica_offset, 0, "", 0) \ M(SettingUInt64, parallel_replica_offset, 0, "", 0) \
\ \
M(SettingSpecialSort, special_sort, SpecialSort::NOT_SPECIFIED, "Specifies a sorting algorithm which will be using in ORDER BY query.", 0) \
\
M(SettingBool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \ M(SettingBool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
\ \
M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \ M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \

View File

@ -485,6 +485,12 @@ void SettingURI::deserialize(ReadBuffer & buf, SettingsBinaryFormat)
IMPLEMENT_SETTING_ENUM(LoadBalancing, LOAD_BALANCING_LIST_OF_NAMES, ErrorCodes::UNKNOWN_LOAD_BALANCING) IMPLEMENT_SETTING_ENUM(LoadBalancing, LOAD_BALANCING_LIST_OF_NAMES, ErrorCodes::UNKNOWN_LOAD_BALANCING)
#define SPECIAL_SORT_ALGORITHM_NAMES(M) \
M(NOT_SPECIFIED, "not_specified") \
M(OPENCL_BITONIC, "opencl_bitonic")
IMPLEMENT_SETTING_ENUM(SpecialSort, SPECIAL_SORT_ALGORITHM_NAMES, ErrorCodes::UNKNOWN_JOIN)
#define JOIN_STRICTNESS_LIST_OF_NAMES(M) \ #define JOIN_STRICTNESS_LIST_OF_NAMES(M) \
M(Unspecified, "") \ M(Unspecified, "") \
M(ALL, "ALL") \ M(ALL, "ALL") \

View File

@ -251,6 +251,15 @@ enum class JoinAlgorithm
}; };
using SettingJoinAlgorithm = SettingEnum<JoinAlgorithm>; using SettingJoinAlgorithm = SettingEnum<JoinAlgorithm>;
enum class SpecialSort
{
NOT_SPECIFIED = 0,
OPENCL_BITONIC,
};
using SettingSpecialSort = SettingEnum<SpecialSort>;
/// Which rows should be included in TOTALS. /// Which rows should be included in TOTALS.
enum class TotalsMode enum class TotalsMode
{ {

View File

@ -5,6 +5,7 @@
#include <cstddef> #include <cstddef>
#include <string> #include <string>
#include <Core/Field.h> #include <Core/Field.h>
#include <Core/SettingsCollection.h>
class Collator; class Collator;
@ -31,21 +32,22 @@ struct SortColumnDescription
std::shared_ptr<Collator> collator; /// Collator for locale-specific comparison of strings std::shared_ptr<Collator> collator; /// Collator for locale-specific comparison of strings
bool with_fill; bool with_fill;
FillColumnDescription fill_description; FillColumnDescription fill_description;
SpecialSort special_sort;
SortColumnDescription( SortColumnDescription(
size_t column_number_, int direction_, int nulls_direction_, size_t column_number_, int direction_, int nulls_direction_,
const std::shared_ptr<Collator> & collator_ = nullptr, bool with_fill_ = false, const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
const FillColumnDescription & fill_description_ = {}) bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_) : column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
, with_fill(with_fill_), fill_description(fill_description_) {} , with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
SortColumnDescription( SortColumnDescription(
const std::string & column_name_, int direction_, int nulls_direction_, const std::string & column_name_, int direction_, int nulls_direction_,
const std::shared_ptr<Collator> & collator_ = nullptr, bool with_fill_ = false, const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
const FillColumnDescription & fill_description_ = {}) bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_) : column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_) {} , collator(collator_), with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
bool operator == (const SortColumnDescription & other) const bool operator == (const SortColumnDescription & other) const
{ {

View File

@ -633,6 +633,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
{ {
SortDescription order_descr; SortDescription order_descr;
order_descr.reserve(query.orderBy()->children.size()); order_descr.reserve(query.orderBy()->children.size());
SpecialSort special_sort = context.getSettings().special_sort.value;
for (const auto & elem : query.orderBy()->children) for (const auto & elem : query.orderBy()->children)
{ {
String name = elem->children.front()->getColumnName(); String name = elem->children.front()->getColumnName();
@ -646,10 +647,10 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
{ {
FillColumnDescription fill_desc = getWithFillDescription(order_by_elem, context); FillColumnDescription fill_desc = getWithFillDescription(order_by_elem, context);
order_descr.emplace_back(name, order_by_elem.direction, order_descr.emplace_back(name, order_by_elem.direction,
order_by_elem.nulls_direction, collator, true, fill_desc); order_by_elem.nulls_direction, collator, special_sort, true, fill_desc);
} }
else else
order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator); order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator, special_sort);
} }
return order_descr; return order_descr;

View File

@ -13,6 +13,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BAD_COLLATION; extern const int BAD_COLLATION;
extern const int OPENCL_ERROR;
} }
static bool isCollationRequired(const SortColumnDescription & description) static bool isCollationRequired(const SortColumnDescription & description)
@ -131,7 +132,22 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
} }
else if (!isColumnConst(*column)) else if (!isColumnConst(*column))
column->getPermutation(reverse, limit, description[0].nulls_direction, perm); {
int nan_direction_hint = description[0].nulls_direction;
/// If in Settings `special_sort` option has been set as `bitonic_sort`,
/// then via `nan_direction_hint` variable a flag which specifies bitonic sort as preferred
/// will be passed to `getPermutation` method with value 42.
if (description[0].special_sort == SpecialSort::OPENCL_BITONIC)
{
nan_direction_hint = 42;
#ifndef BITONIC_SORT_PREFERRED
throw DB::Exception("Bitonic sort specified as preferred, but OpenCL not available", DB::ErrorCodes::OPENCL_ERROR);
#endif
}
column->getPermutation(reverse, limit, nan_direction_hint, perm);
}
else else
/// we don't need to do anything with const column /// we don't need to do anything with const column
is_column_const = true; is_column_const = true;

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toUInt8(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toInt8(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toUInt16(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toInt16(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toUInt32(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toInt32(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toUInt64(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'

View File

@ -0,0 +1,42 @@
82
80
78
76
74
72
70
68
66
64
62
60
58
56
54
52
50
48
46
44
42
40
38
36
34
32
30
28
26
24
22
20
18
16
14
12
10
8
6
4
2
0

View File

@ -0,0 +1 @@
select toInt64(number * 2) as x from numbers(42) order by x desc settings special_sort = 'opencl_bitonic'