Remove OpenCL

This commit is contained in:
Alexey Milovidov 2020-09-19 17:20:00 +03:00
parent 8e2231036b
commit 059646743b
17 changed files with 7 additions and 870 deletions

View File

@ -404,7 +404,6 @@ include (cmake/find/amqpcpp.cmake)
include (cmake/find/capnp.cmake)
include (cmake/find/llvm.cmake)
include (cmake/find/termcap.cmake) # for external static llvm
include (cmake/find/opencl.cmake)
include (cmake/find/h3.cmake)
include (cmake/find/libxml2.cmake)
include (cmake/find/brotli.cmake)
@ -450,13 +449,6 @@ include (cmake/find/mysqlclient.cmake)
# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc.
if (USE_OPENCL)
if (OS_DARWIN)
set(OPENCL_LINKER_FLAGS "-framework OpenCL")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENCL_LINKER_FLAGS}")
endif ()
endif ()
include (cmake/print_flags.cmake)
if (TARGET global-group)

View File

@ -1,25 +0,0 @@
# TODO: enable by default
if(0)
option(ENABLE_OPENCL "Enable OpenCL support" ${ENABLE_LIBRARIES})
endif()
if(NOT ENABLE_OPENCL)
return()
endif()
# Intel OpenCl driver: sudo apt install intel-opencl-icd
# @sa https://github.com/intel/compute-runtime/releases
# OpenCL applications should link with ICD loader
# sudo apt install opencl-headers ocl-icd-libopencl1
# sudo ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so
# TODO: add https://github.com/OCL-dev/ocl-icd as submodule instead
find_package(OpenCL)
if(OpenCL_FOUND)
set(USE_OPENCL 1)
else()
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable OpenCL support")
endif()
message(STATUS "Using opencl=${USE_OPENCL}: ${OpenCL_INCLUDE_DIRS} : ${OpenCL_LIBRARIES}")

View File

@ -378,11 +378,6 @@ if (USE_BROTLI)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
endif()
if (USE_OPENCL)
target_link_libraries (clickhouse_common_io PRIVATE ${OpenCL_LIBRARIES})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS})
endif ()
if (USE_CASSANDRA)
dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})

View File

@ -19,15 +19,6 @@
#include <pdqsort.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
# if USE_OPENCL
# include "Common/BitonicSort.h" // Y_IGNORE
# endif
#else
#undef USE_OPENCL
#endif
#ifdef __SSE2__
#include <emmintrin.h>
#endif
@ -39,7 +30,6 @@ namespace ErrorCodes
{
extern const int PARAMETER_OUT_OF_BOUND;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int OPENCL_ERROR;
extern const int LOGICAL_ERROR;
}
@ -147,29 +137,6 @@ namespace
};
}
template <typename T>
void ColumnVector<T>::getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res,
IColumn::SpecialSort special_sort) const
{
if (special_sort == IColumn::SpecialSort::OPENCL_BITONIC)
{
#if !defined(ARCADIA_BUILD)
#if USE_OPENCL
if (!limit || limit >= data.size())
{
res.resize(data.size());
if (data.empty() || BitonicSort::getInstance().sort(data, res, !reverse))
return;
}
#else
throw DB::Exception("'special_sort = bitonic' specified but OpenCL not available", DB::ErrorCodes::OPENCL_ERROR);
#endif
#endif
}
getPermutation(reverse, limit, nan_direction_hint, res);
}
template <typename T>
void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const

View File

@ -218,8 +218,6 @@ public:
}
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res,
IColumn::SpecialSort) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_range) const override;

View File

@ -267,17 +267,6 @@ public:
*/
virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const = 0;
enum class SpecialSort
{
NONE = 0,
OPENCL_BITONIC,
};
virtual void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, SpecialSort) const
{
getPermutation(reverse, limit, nan_direction_hint, res);
}
/*in updatePermutation we pass the current permutation and the intervals at which it should be sorted
* Then for each interval separately (except for the last one, if there is a limit)
* We sort it based on data about the current column, and find all the intervals within this

View File

@ -1,221 +0,0 @@
#pragma once
#include <string.h>
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include <ext/bit_cast.h>
#include <common/types.h>
#include <Core/Defines.h>
#include <Common/PODArray.h>
#include <Columns/ColumnsCommon.h>
#include "oclBasics.h"
#include "bitonicSortKernels.cl"
class BitonicSort
{
public:
using KernelType = OCL::KernelType;
enum Types
{
KernelInt8 = 0,
KernelUInt8,
KernelInt16,
KernelUInt16,
KernelInt32,
KernelUInt32,
KernelInt64,
KernelUInt64,
KernelMax
};
static BitonicSort & getInstance()
{
static BitonicSort instance = BitonicSort();
return instance;
}
/// Sorts given array in specified order. Returns `true` if given sequence was sorted, `false` otherwise.
template <typename T>
bool sort(const DB::PaddedPODArray<T> & data, DB::IColumn::Permutation & res, cl_uint sort_ascending [[maybe_unused]]) const
{
if constexpr (
std::is_same_v<T, Int8> ||
std::is_same_v<T, UInt8> ||
std::is_same_v<T, Int16> ||
std::is_same_v<T, UInt16> ||
std::is_same_v<T, Int32> ||
std::is_same_v<T, UInt32> ||
std::is_same_v<T, Int64> ||
std::is_same_v<T, UInt64>)
{
size_t data_size = data.size();
/// Getting the nearest power of 2.
size_t power = 8;
while (power < data_size)
power <<= 1;
/// Allocates more space for additional stubs to be added if needed.
std::vector<T> pairs_content(power);
std::vector<UInt32> pairs_indices(power);
memcpy(&pairs_content[0], &data[0], sizeof(T) * data_size);
for (UInt32 i = 0; i < data_size; ++i)
pairs_indices[i] = i;
fillWithStubs(pairs_content.data(), pairs_indices.data(), data_size, power - data_size, sort_ascending);
sort(pairs_content.data(), pairs_indices.data(), power, sort_ascending);
for (size_t i = 0, shift = 0; i < power; ++i)
{
if (pairs_indices[i] >= data_size)
{
++shift;
continue;
}
res[i - shift] = pairs_indices[i];
}
return true;
}
return false;
}
/// Creating a configuration instance with making all OpenCl required variables
/// such as device, platform, context, queue, program and kernel.
void configure()
{
OCL::Settings settings = OCL::Settings(1, nullptr, 1, nullptr, 1, 0);
cl_platform_id platform = OCL::getPlatformID(settings);
cl_device_id device = OCL::getDeviceID(platform, settings);
cl_context gpu_context = OCL::makeContext(device, settings);
cl_command_queue command_queue = OCL::makeCommandQueue<2>(device, gpu_context, settings);
cl_program program = OCL::makeProgram(bitonic_sort_kernels, gpu_context, device, settings);
/// Creating kernels for each specified data type.
cl_int error = 0;
kernels.resize(KernelMax);
kernels[KernelInt8] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_char", &error), clReleaseKernel);
OCL::checkError(error);
kernels[KernelUInt8] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uchar", &error), clReleaseKernel);
OCL::checkError(error);
kernels[KernelInt16] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_short", &error), clReleaseKernel);
OCL::checkError(error);
kernels[KernelUInt16] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ushort", &error), clReleaseKernel);
OCL::checkError(error);
kernels[KernelInt32] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_int", &error), clReleaseKernel);
OCL::checkError(error);
kernels[KernelUInt32] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uint", &error), clReleaseKernel);
OCL::checkError(error);
kernels[KernelInt64] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_long", &error), clReleaseKernel);
OCL::checkError(error);
kernels[KernelUInt64] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ulong", &error), clReleaseKernel);
OCL::checkError(error);
configuration = std::shared_ptr<OCL::Configuration>(new OCL::Configuration(device, gpu_context, command_queue, program));
}
private:
/// Dictionary with kernels for each type from list: uchar, char, ushort, short, uint, int, ulong and long.
std::vector<std::shared_ptr<KernelType>> kernels;
/// Current configuration with core OpenCL instances.
std::shared_ptr<OCL::Configuration> configuration = nullptr;
cl_kernel getKernel(Int8) const { return kernels[KernelInt8].get(); }
cl_kernel getKernel(UInt8) const { return kernels[KernelUInt8].get(); }
cl_kernel getKernel(Int16) const { return kernels[KernelInt16].get(); }
cl_kernel getKernel(UInt16) const { return kernels[KernelUInt16].get(); }
cl_kernel getKernel(Int32) const { return kernels[KernelInt32].get(); }
cl_kernel getKernel(UInt32) const { return kernels[KernelUInt32].get(); }
cl_kernel getKernel(Int64) const { return kernels[KernelInt64].get(); }
cl_kernel getKernel(UInt64) const { return kernels[KernelUInt64].get(); }
/// Sorts p_input inplace with indices. Works only with arrays which size equals to power of two.
template <class T>
void sort(T * p_input, cl_uint * indices, cl_int array_size, cl_uint sort_ascending) const
{
cl_kernel kernel = getKernel(T(0));
cl_int error = CL_SUCCESS;
cl_int num_stages = 0;
for (cl_int temp = array_size; temp > 2; temp >>= 1)
num_stages++;
/// Creating OpenCL buffers using input arrays memory.
cl_mem cl_input_buffer = OCL::createBuffer<T>(p_input, array_size, configuration.get()->context());
cl_mem cl_indices_buffer = OCL::createBuffer<cl_uint>(indices, array_size, configuration.get()->context());
configureKernel<cl_mem>(kernel, 0, static_cast<void *>(&cl_input_buffer));
configureKernel<cl_mem>(kernel, 1, static_cast<void *>(&cl_indices_buffer));
configureKernel<cl_uint>(kernel, 4, static_cast<void *>(&sort_ascending));
for (cl_int stage = 0; stage < num_stages; stage++)
{
configureKernel<cl_uint>(kernel, 2, static_cast<void *>(&stage));
for (cl_int pass_of_stage = stage; pass_of_stage >= 0; pass_of_stage--)
{
configureKernel<cl_uint>(kernel, 3, static_cast<void *>(&pass_of_stage));
/// Setting work-item dimensions.
size_t gsize = array_size / (2 * 4);
size_t global_work_size[1] = {pass_of_stage ? gsize : gsize << 1 }; // number of quad items in input array
/// Executing kernel.
error = clEnqueueNDRangeKernel(configuration.get()->commandQueue(), kernel, 1, nullptr,
global_work_size, nullptr, 0, nullptr, nullptr);
OCL::checkError(error);
}
}
/// Syncs all threads.
OCL::finishCommandQueue(configuration.get()->commandQueue());
OCL::releaseData(p_input, array_size, cl_input_buffer, configuration.get()->commandQueue());
OCL::releaseData(indices, array_size, cl_indices_buffer, configuration.get()->commandQueue());
}
template <class T>
void configureKernel(cl_kernel kernel, int number_of_argument, void * source) const
{
cl_int error = clSetKernelArg(kernel, number_of_argument, sizeof(T), source);
OCL::checkError(error);
}
/// Fills given sequences from `arraySize` index with `numberOfStubs` values.
template <class T>
void fillWithStubs(T * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending) const
{
T value = sort_ascending ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
for (cl_int index = 0; index < number_of_stubs; ++index)
{
p_input[array_size + index] = value;
indices[array_size + index] = array_size + index;
}
}
BitonicSort() = default;
BitonicSort(BitonicSort const &) = delete;
void operator = (BitonicSort const &) = delete;
};

View File

@ -486,7 +486,6 @@ namespace ErrorCodes
extern const int NO_REMOTE_SHARD_AVAILABLE = 519;
extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE = 520;
extern const int ATOMIC_RENAME_FAIL = 521;
extern const int OPENCL_ERROR = 522;
extern const int UNKNOWN_ROW_POLICY = 523;
extern const int ALTER_OF_COLUMN_IS_FORBIDDEN = 524;
extern const int INCORRECT_DISK_INDEX = 525;

View File

@ -1,354 +0,0 @@
#pragma once
#include <Common/config.h>
#if USE_OPENCL
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include <algorithm>
#include <common/types.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int OPENCL_ERROR;
}
}
struct OCL
{
using KernelType = std::remove_reference<decltype(*cl_kernel())>::type;
/**
* Structure which represents the most essential settings of common OpenCl entities.
*/
struct Settings
{
// Platform info
cl_uint number_of_platform_entries;
cl_uint * number_of_available_platforms;
// Devices info
cl_uint number_of_devices_entries;
cl_uint * number_of_available_devices;
// Context settings
cl_context_properties * context_properties;
void (* context_callback)(const char *, const void *, size_t, void *);
void * context_callback_data;
// Command queue settings
cl_command_queue_properties command_queue_properties;
// Build settings
cl_uint number_of_program_source_pointers;
void (* build_notification_routine)(cl_program, void *user_data);
void * build_callback_data;
char * build_options;
Settings(cl_uint number_of_platform_entries_,
cl_uint * number_of_available_platforms_,
cl_uint number_of_devices_entries_,
cl_uint * number_of_available_devices_,
cl_uint number_of_program_source_pointers_,
cl_command_queue_properties command_queue_properties_,
cl_context_properties * context_properties_ = nullptr,
void * context_data_callback_ = nullptr,
void (* context_callback_)(const char *, const void *, size_t, void *) = nullptr,
void (* build_notification_routine_)(cl_program, void * user_data) = nullptr,
void * build_callback_data_ = nullptr,
char * build_options_ = nullptr)
{
this->number_of_platform_entries = number_of_platform_entries_;
this->number_of_available_platforms = number_of_available_platforms_;
this->number_of_devices_entries = number_of_devices_entries_;
this->number_of_available_devices = number_of_available_devices_;
this->number_of_program_source_pointers = number_of_program_source_pointers_;
this->command_queue_properties = command_queue_properties_;
this->context_properties = context_properties_;
this->context_callback = context_callback_;
this->context_callback_data = context_data_callback_;
this->build_notification_routine = build_notification_routine_;
this->build_callback_data = build_callback_data_;
this->build_options = build_options_;
}
};
/**
* Configuration with already created OpenCl common entities.
*/
class Configuration
{
public:
Configuration(cl_device_id device, cl_context gpu_context,
cl_command_queue command_queue, cl_program program)
{
this->device_ = device;
this->gpu_context_ = std::shared_ptr<ContextType>(gpu_context, clReleaseContext);
this->command_queue_ = std::shared_ptr<CommandQueueType>(command_queue, clReleaseCommandQueue);
this->program_ = std::shared_ptr<ProgramType>(program, clReleaseProgram);
}
cl_device_id device() { return device_; }
cl_context context() { return gpu_context_.get(); }
cl_command_queue commandQueue() { return command_queue_.get(); }
cl_program program() { return program_.get(); }
private:
using ProgramType = std::remove_reference<decltype(*cl_program())>::type;
using CommandQueueType = std::remove_reference<decltype(*cl_command_queue())>::type;
using ContextType = std::remove_reference<decltype(*cl_context())>::type;
cl_device_id device_;
std::shared_ptr<ContextType> gpu_context_;
std::shared_ptr<CommandQueueType> command_queue_;
std::shared_ptr<ProgramType> program_;
};
static String opencl_error_to_str(cl_int error)
{
#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME;
// Suppose that no combinations are possible.
switch (error)
{
CASE_CL_CONSTANT(CL_SUCCESS)
CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_MAP_FAILURE)
CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
CASE_CL_CONSTANT(CL_COMPILE_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_LINKER_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_LINK_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_DEVICE_PARTITION_FAILED)
CASE_CL_CONSTANT(CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_INVALID_VALUE)
CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
CASE_CL_CONSTANT(CL_INVALID_DEVICE)
CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
CASE_CL_CONSTANT(CL_INVALID_BINARY)
CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
CASE_CL_CONSTANT(CL_INVALID_KERNEL)
CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_EVENT)
CASE_CL_CONSTANT(CL_INVALID_OPERATION)
CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_DESCRIPTOR)
CASE_CL_CONSTANT(CL_INVALID_COMPILER_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_LINKER_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_DEVICE_PARTITION_COUNT)
default:
return "UNKNOWN ERROR CODE ";
}
#undef CASE_CL_CONSTANT
}
static void checkError(cl_int error)
{
if (error != CL_SUCCESS)
throw DB::Exception("OpenCL error: " + opencl_error_to_str(error), DB::ErrorCodes::OPENCL_ERROR);
}
/// Getting OpenCl main entities.
static cl_platform_id getPlatformID(const Settings & settings)
{
cl_platform_id platform;
cl_int error = clGetPlatformIDs(settings.number_of_platform_entries, &platform,
settings.number_of_available_platforms);
checkError(error);
return platform;
}
static cl_device_id getDeviceID(cl_platform_id & platform, const Settings & settings)
{
cl_device_id device;
cl_int error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, settings.number_of_devices_entries,
&device, settings.number_of_available_devices);
OCL::checkError(error);
return device;
}
static cl_context makeContext(cl_device_id & device, const Settings & settings)
{
cl_int error;
cl_context gpu_context = clCreateContext(settings.context_properties, settings.number_of_devices_entries,
&device, settings.context_callback, settings.context_callback_data,
&error);
OCL::checkError(error);
return gpu_context;
}
template <int version>
static cl_command_queue makeCommandQueue(cl_device_id & device, cl_context & context, const Settings & settings [[maybe_unused]])
{
cl_int error;
cl_command_queue command_queue;
if constexpr (version == 1)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
command_queue = clCreateCommandQueue(context, device, settings.command_queue_properties, &error);
#pragma GCC diagnostic pop
}
else
{
#ifdef CL_VERSION_2_0
command_queue = clCreateCommandQueueWithProperties(context, device, nullptr, &error);
#else
throw DB::Exception("Binary is built with OpenCL version < 2.0", DB::ErrorCodes::OPENCL_ERROR);
#endif
}
OCL::checkError(error);
return command_queue;
}
static cl_program makeProgram(const char * source_code, cl_context context,
cl_device_id device_id, const Settings & settings)
{
cl_int error = 0;
size_t source_size = strlen(source_code);
cl_program program = clCreateProgramWithSource(context, settings.number_of_program_source_pointers,
&source_code, &source_size, &error);
checkError(error);
error = clBuildProgram(program, settings.number_of_devices_entries, &device_id, settings.build_options,
settings.build_notification_routine, settings.build_callback_data);
/// Combining additional logs output when program build failed.
if (error == CL_BUILD_PROGRAM_FAILURE)
{
size_t log_size;
error = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
checkError(error);
std::vector<char> log(log_size);
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log.data(), nullptr);
checkError(error);
throw DB::Exception(log.data(), DB::ErrorCodes::OPENCL_ERROR);
}
checkError(error);
return program;
}
/// Configuring buffer for given input data
template<typename K>
static cl_mem createBuffer(K * p_input, cl_int array_size, cl_context context, cl_int elements_size = sizeof(K))
{
cl_int error = CL_SUCCESS;
cl_mem cl_input_buffer = clCreateBuffer(
context,
CL_MEM_USE_HOST_PTR,
zeroCopySizeAlignment(elements_size * array_size),
p_input,
&error);
checkError(error);
return cl_input_buffer;
}
static size_t zeroCopySizeAlignment(size_t required_size)
{
return required_size + (~required_size + 1) % 64;
}
/// Manipulating with common OpenCL variables.
static void finishCommandQueue(cl_command_queue command_queue)
{
// Blocks until all previously queued OpenCL commands in a queue are issued to the associated device.
cl_int error = clFinish(command_queue);
OCL::checkError(error);
}
template<class T>
static void releaseData(T * origin, cl_int array_size, cl_mem cl_buffer, cl_command_queue command_queue, size_t offset = 0)
{
cl_int error = CL_SUCCESS;
void * tmp_ptr = nullptr;
// No events specified to be completed before enqueueing buffers,
// so `num_events_in_wait_list` passed with `0` value.
tmp_ptr = clEnqueueMapBuffer(command_queue, cl_buffer, true, CL_MAP_READ,
offset, sizeof(cl_int) * array_size, 0, nullptr, nullptr, &error);
OCL::checkError(error);
if (tmp_ptr != origin)
throw DB::Exception("clEnqueueMapBuffer failed to return original pointer", DB::ErrorCodes::OPENCL_ERROR);
error = clEnqueueUnmapMemObject(command_queue, cl_buffer, tmp_ptr, 0, nullptr, nullptr);
checkError(error);
error = clReleaseMemObject(cl_buffer);
checkError(error);
}
};
#endif

View File

@ -35,11 +35,6 @@ add_executable (radix_sort radix_sort.cpp)
target_link_libraries (radix_sort PRIVATE clickhouse_common_io)
target_include_directories(radix_sort SYSTEM PRIVATE ${PDQSORT_INCLUDE_DIR})
if (USE_OPENCL)
add_executable (bitonic_sort bitonic_sort.cpp)
target_link_libraries (bitonic_sort PRIVATE clickhouse_common_io ${OPENCL_LINKER_FLAGS} ${OpenCL_LIBRARIES})
endif ()
add_executable (arena_with_free_lists arena_with_free_lists.cpp)
target_link_libraries (arena_with_free_lists PRIVATE dbms)

View File

@ -1,174 +0,0 @@
#include <Common/config.h>
#include <iostream>
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#include <ext/bit_cast.h>
#include <Common/Stopwatch.h>
#include <IO/ReadHelpers.h>
#include <Core/Defines.h>
#include <climits>
#include <algorithm>
#include "Common/BitonicSort.h"
/// Generates vector of size 8 for testing.
/// Vector contains max possible value, min possible value and duplicate values.
template <class Type>
static void generateTest(std::vector<Type> & data, Type min_value, Type max_value)
{
int size = 10;
data.resize(size);
data[0] = 10;
data[1] = max_value;
data[2] = 10;
data[3] = 20;
data[4] = min_value;
data[5] = min_value + 1;
data[6] = max_value - 5;
data[7] = 1;
data[8] = 42;
data[9] = max_value - 1;
}
static void check(const std::vector<size_t> & indices, bool reverse = true)
{
std::vector<size_t> reference_indices{4, 5, 7, 0, 2, 3, 8, 6, 9, 1};
if (reverse) std::reverse(reference_indices.begin(), reference_indices.end());
bool success = true;
for (size_t index = 0; index < reference_indices.size(); ++index)
{
if (indices[index] != reference_indices[index])
{
success = false;
std::cerr << "Test failed. Reason: indices[" << index << "] = "
<< indices[index] << ", it must be equal to " << reference_indices[index] << "\n";
}
}
std::string order_description = reverse ? "descending" : "ascending";
std::cerr << "Sorted " << order_description << " sequence. Result: " << (success ? "Ok." : "Fail!") << "\n";
}
template <class Type>
static void sortBitonicSortWithPodArrays(const std::vector<Type> & data, std::vector<size_t> & indices, bool ascending = true)
{
DB::PaddedPODArray<Type> pod_array_data = DB::PaddedPODArray<Type>(data.size());
DB::IColumn::Permutation pod_array_indices = DB::IColumn::Permutation(data.size());
for (size_t index = 0; index < data.size(); ++index)
{
*(pod_array_data.data() + index) = data[index];
*(pod_array_indices.data() + index) = index;
}
BitonicSort::getInstance().sort(pod_array_data, pod_array_indices, ascending);
for (size_t index = 0; index < data.size(); ++index)
indices[index] = pod_array_indices[index];
}
template <class Type>
static void testBitonicSort(const std::string & test_name, Type min_value, Type max_value)
{
std::cerr << test_name << std::endl;
std::vector<Type> data;
generateTest<Type>(data, min_value, max_value);
std::vector<size_t> indices(data.size());
sortBitonicSortWithPodArrays(data, indices, true);
check(indices, false);
sortBitonicSortWithPodArrays(data, indices, false);
check(indices, true);
}
static void straightforwardTests()
{
testBitonicSort<DB::Int8>("Test 01: Int8.", CHAR_MIN, CHAR_MAX);
testBitonicSort<DB::UInt8>("Test 02: UInt8.", 0, UCHAR_MAX);
testBitonicSort<DB::Int16>("Test 03: Int16.", SHRT_MIN, SHRT_MAX);
testBitonicSort<DB::UInt16>("Test 04: UInt16.", 0, USHRT_MAX);
testBitonicSort<DB::Int32>("Test 05: Int32.", INT_MIN, INT_MAX);
testBitonicSort<DB::UInt32>("Test 06: UInt32.", 0, UINT_MAX);
testBitonicSort<DB::Int64>("Test 07: Int64.", LONG_MIN, LONG_MAX);
testBitonicSort<DB::UInt64>("Test 08: UInt64.", 0, ULONG_MAX);
}
template <typename T>
static void bitonicSort(std::vector<T> & data)
{
size_t size = data.size();
std::vector<size_t> indices(size);
for (size_t i = 0; i < size; ++i)
indices[i] = i;
sortBitonicSortWithPodArrays(data, indices);
std::vector<T> result(size);
for (size_t i = 0; i < size; ++i)
result[i] = data[indices[i]];
data = std::move(result);
}
template <typename T>
static bool checkSort(const std::vector<T> & data, size_t size)
{
std::vector<T> copy1(data.begin(), data.begin() + size);
std::vector<T> copy2(data.begin(), data.begin() + size);
std::sort(copy1.data(), copy1.data() + size);
bitonicSort<T>(copy2);
for (size_t i = 0; i < size; ++i)
if (copy1[i] != copy2[i])
return false;
return true;
}
int main()
{
BitonicSort::getInstance().configure();
straightforwardTests();
size_t size = 1100;
std::vector<int> data(size);
for (size_t i = 0; i < size; ++i)
data[i] = rand();
for (size_t i = 0; i < 128; ++i)
{
if (!checkSort<int>(data, i))
{
std::cerr << "fail at length " << i << std::endl;
return 1;
}
}
for (size_t i = 128; i < size; i += 7)
{
if (!checkSort<int>(data, i))
{
std::cerr << "fail at length " << i << std::endl;
return 1;
}
}
return 0;
}

View File

@ -105,8 +105,6 @@ class IColumn;
M(UInt64, parallel_replicas_count, 0, "", 0) \
M(UInt64, parallel_replica_offset, 0, "", 0) \
\
M(SpecialSort, special_sort, SpecialSort::NOT_SPECIFIED, "Specifies a sorting algorithm which will be using in ORDER BY query.", 0) \
\
M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
\
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \

View File

@ -23,11 +23,6 @@ IMPLEMENT_SETTING_ENUM(LoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING,
{"round_robin", LoadBalancing::ROUND_ROBIN}})
IMPLEMENT_SETTING_ENUM(SpecialSort, ErrorCodes::UNKNOWN_JOIN,
{{"not_specified", SpecialSort::NOT_SPECIFIED},
{"opencl_bitonic", SpecialSort::OPENCL_BITONIC}})
IMPLEMENT_SETTING_ENUM(JoinStrictness, ErrorCodes::UNKNOWN_JOIN,
{{"", JoinStrictness::Unspecified},
{"ALL", JoinStrictness::ALL},

View File

@ -47,15 +47,6 @@ enum class JoinAlgorithm
DECLARE_SETTING_ENUM(JoinAlgorithm)
enum class SpecialSort
{
NOT_SPECIFIED = 0,
OPENCL_BITONIC,
};
DECLARE_SETTING_ENUM(SpecialSort)
/// Which rows should be included in TOTALS.
enum class TotalsMode
{

View File

@ -32,22 +32,20 @@ struct SortColumnDescription
std::shared_ptr<Collator> collator; /// Collator for locale-specific comparison of strings
bool with_fill;
FillColumnDescription fill_description;
SpecialSort special_sort;
SortColumnDescription(
size_t column_number_, int direction_, int nulls_direction_,
const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
const std::shared_ptr<Collator> & collator_ = nullptr,
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
, with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
, with_fill(with_fill_), fill_description(fill_description_) {}
SortColumnDescription(
const std::string & column_name_, int direction_, int nulls_direction_,
const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
const std::shared_ptr<Collator> & collator_ = nullptr,
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_) {}
bool operator == (const SortColumnDescription & other) const
{

View File

@ -621,7 +621,6 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
{
SortDescription order_descr;
order_descr.reserve(query.orderBy()->children.size());
SpecialSort special_sort = context.getSettings().special_sort.value;
for (const auto & elem : query.orderBy()->children)
{
String name = elem->children.front()->getColumnName();
@ -635,10 +634,10 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co
{
FillColumnDescription fill_desc = getWithFillDescription(order_by_elem, context);
order_descr.emplace_back(name, order_by_elem.direction,
order_by_elem.nulls_direction, collator, special_sort, true, fill_desc);
order_by_elem.nulls_direction, collator, true, fill_desc);
}
else
order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator, special_sort);
order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator);
}
return order_descr;

View File

@ -132,12 +132,7 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
else if (!isColumnConst(*column))
{
int nan_direction_hint = description[0].nulls_direction;
auto special_sort = description[0].special_sort;
if (special_sort == SpecialSort::OPENCL_BITONIC)
column->getSpecialPermutation(reverse, limit, nan_direction_hint, perm, IColumn::SpecialSort::OPENCL_BITONIC);
else
column->getPermutation(reverse, limit, nan_direction_hint, perm);
column->getPermutation(reverse, limit, nan_direction_hint, perm);
}
else
/// we don't need to do anything with const column