diff --git a/CMakeLists.txt b/CMakeLists.txt index cf97b2c40ff..a879ad9b076 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -404,7 +404,6 @@ include (cmake/find/amqpcpp.cmake) include (cmake/find/capnp.cmake) include (cmake/find/llvm.cmake) include (cmake/find/termcap.cmake) # for external static llvm -include (cmake/find/opencl.cmake) include (cmake/find/h3.cmake) include (cmake/find/libxml2.cmake) include (cmake/find/brotli.cmake) @@ -450,13 +449,6 @@ include (cmake/find/mysqlclient.cmake) # When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc. -if (USE_OPENCL) - if (OS_DARWIN) - set(OPENCL_LINKER_FLAGS "-framework OpenCL") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENCL_LINKER_FLAGS}") - endif () -endif () - include (cmake/print_flags.cmake) if (TARGET global-group) diff --git a/cmake/find/opencl.cmake b/cmake/find/opencl.cmake deleted file mode 100644 index 2b0cc7c5dd4..00000000000 --- a/cmake/find/opencl.cmake +++ /dev/null @@ -1,25 +0,0 @@ -# TODO: enable by default -if(0) - option(ENABLE_OPENCL "Enable OpenCL support" ${ENABLE_LIBRARIES}) -endif() - -if(NOT ENABLE_OPENCL) - return() -endif() - -# Intel OpenCl driver: sudo apt install intel-opencl-icd -# @sa https://github.com/intel/compute-runtime/releases - -# OpenCL applications should link with ICD loader -# sudo apt install opencl-headers ocl-icd-libopencl1 -# sudo ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so -# TODO: add https://github.com/OCL-dev/ocl-icd as submodule instead - -find_package(OpenCL) -if(OpenCL_FOUND) - set(USE_OPENCL 1) -else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable OpenCL support") -endif() - -message(STATUS "Using opencl=${USE_OPENCL}: ${OpenCL_INCLUDE_DIRS} : ${OpenCL_LIBRARIES}") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b6e8c395b26..b058ab749b6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -378,11 +378,6 @@ if (USE_BROTLI) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) endif() -if (USE_OPENCL) - target_link_libraries (clickhouse_common_io PRIVATE ${OpenCL_LIBRARIES}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS}) -endif () - if (USE_CASSANDRA) dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY}) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 829dc7d7aaa..733a1510f93 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -19,15 +19,6 @@ #include -#if !defined(ARCADIA_BUILD) -# include -# if USE_OPENCL -# include "Common/BitonicSort.h" // Y_IGNORE -# endif -#else -#undef USE_OPENCL -#endif - #ifdef __SSE2__ #include #endif @@ -39,7 +30,6 @@ namespace ErrorCodes { extern const int PARAMETER_OUT_OF_BOUND; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; - extern const int OPENCL_ERROR; extern const int LOGICAL_ERROR; } @@ -147,29 +137,6 @@ namespace }; } -template -void ColumnVector::getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, - IColumn::SpecialSort special_sort) const -{ - if (special_sort == IColumn::SpecialSort::OPENCL_BITONIC) - { -#if !defined(ARCADIA_BUILD) -#if USE_OPENCL - if (!limit || limit >= data.size()) - { - res.resize(data.size()); - - if (data.empty() || BitonicSort::getInstance().sort(data, res, !reverse)) - return; - } -#else - throw DB::Exception("'special_sort = bitonic' specified but OpenCL not available", DB::ErrorCodes::OPENCL_ERROR); -#endif -#endif - } - - getPermutation(reverse, limit, nan_direction_hint, res); -} template void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 55ab67d6214..c6600ca7e31 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -218,8 +218,6 @@ public: } void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; - void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, - IColumn::SpecialSort) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_range) const override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 40ff0649f4f..14e6a9d7eed 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -267,17 +267,6 @@ public: */ virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const = 0; - enum class SpecialSort - { - NONE = 0, - OPENCL_BITONIC, - }; - - virtual void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, SpecialSort) const - { - getPermutation(reverse, limit, nan_direction_hint, res); - } - /*in updatePermutation we pass the current permutation and the intervals at which it should be sorted * Then for each interval separately (except for the last one, if there is a limit) * We sort it based on data about the current column, and find all the intervals within this diff --git a/src/Common/BitonicSort.h b/src/Common/BitonicSort.h deleted file mode 100644 index 8140687c040..00000000000 --- a/src/Common/BitonicSort.h +++ /dev/null @@ -1,221 +0,0 @@ -#pragma once - -#include -#if !defined(__APPLE__) && !defined(__FreeBSD__) -#include -#endif - -#ifdef __APPLE__ -#include -#else -#include -#endif - -#include -#include -#include -#include -#include - -#include "oclBasics.h" -#include "bitonicSortKernels.cl" - -class BitonicSort -{ -public: - using KernelType = OCL::KernelType; - - enum Types - { - KernelInt8 = 0, - KernelUInt8, - KernelInt16, - KernelUInt16, - KernelInt32, - KernelUInt32, - KernelInt64, - KernelUInt64, - KernelMax - }; - - static BitonicSort & getInstance() - { - static BitonicSort instance = BitonicSort(); - return instance; - } - - /// Sorts given array in specified order. Returns `true` if given sequence was sorted, `false` otherwise. - template - bool sort(const DB::PaddedPODArray & data, DB::IColumn::Permutation & res, cl_uint sort_ascending [[maybe_unused]]) const - { - if constexpr ( - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) - { - size_t data_size = data.size(); - - /// Getting the nearest power of 2. - size_t power = 8; - while (power < data_size) - power <<= 1; - - /// Allocates more space for additional stubs to be added if needed. - std::vector pairs_content(power); - std::vector pairs_indices(power); - - memcpy(&pairs_content[0], &data[0], sizeof(T) * data_size); - for (UInt32 i = 0; i < data_size; ++i) - pairs_indices[i] = i; - - fillWithStubs(pairs_content.data(), pairs_indices.data(), data_size, power - data_size, sort_ascending); - sort(pairs_content.data(), pairs_indices.data(), power, sort_ascending); - - for (size_t i = 0, shift = 0; i < power; ++i) - { - if (pairs_indices[i] >= data_size) - { - ++shift; - continue; - } - res[i - shift] = pairs_indices[i]; - } - - return true; - } - - return false; - } - - /// Creating a configuration instance with making all OpenCl required variables - /// such as device, platform, context, queue, program and kernel. - void configure() - { - OCL::Settings settings = OCL::Settings(1, nullptr, 1, nullptr, 1, 0); - - cl_platform_id platform = OCL::getPlatformID(settings); - cl_device_id device = OCL::getDeviceID(platform, settings); - cl_context gpu_context = OCL::makeContext(device, settings); - cl_command_queue command_queue = OCL::makeCommandQueue<2>(device, gpu_context, settings); - - cl_program program = OCL::makeProgram(bitonic_sort_kernels, gpu_context, device, settings); - - /// Creating kernels for each specified data type. - cl_int error = 0; - kernels.resize(KernelMax); - - kernels[KernelInt8] = std::shared_ptr(clCreateKernel(program, "bitonicSort_char", &error), clReleaseKernel); - OCL::checkError(error); - - kernels[KernelUInt8] = std::shared_ptr(clCreateKernel(program, "bitonicSort_uchar", &error), clReleaseKernel); - OCL::checkError(error); - - kernels[KernelInt16] = std::shared_ptr(clCreateKernel(program, "bitonicSort_short", &error), clReleaseKernel); - OCL::checkError(error); - - kernels[KernelUInt16] = std::shared_ptr(clCreateKernel(program, "bitonicSort_ushort", &error), clReleaseKernel); - OCL::checkError(error); - - kernels[KernelInt32] = std::shared_ptr(clCreateKernel(program, "bitonicSort_int", &error), clReleaseKernel); - OCL::checkError(error); - - kernels[KernelUInt32] = std::shared_ptr(clCreateKernel(program, "bitonicSort_uint", &error), clReleaseKernel); - OCL::checkError(error); - - kernels[KernelInt64] = std::shared_ptr(clCreateKernel(program, "bitonicSort_long", &error), clReleaseKernel); - OCL::checkError(error); - - kernels[KernelUInt64] = std::shared_ptr(clCreateKernel(program, "bitonicSort_ulong", &error), clReleaseKernel); - OCL::checkError(error); - - configuration = std::shared_ptr(new OCL::Configuration(device, gpu_context, command_queue, program)); - } - -private: - /// Dictionary with kernels for each type from list: uchar, char, ushort, short, uint, int, ulong and long. - std::vector> kernels; - /// Current configuration with core OpenCL instances. - std::shared_ptr configuration = nullptr; - - cl_kernel getKernel(Int8) const { return kernels[KernelInt8].get(); } - cl_kernel getKernel(UInt8) const { return kernels[KernelUInt8].get(); } - cl_kernel getKernel(Int16) const { return kernels[KernelInt16].get(); } - cl_kernel getKernel(UInt16) const { return kernels[KernelUInt16].get(); } - cl_kernel getKernel(Int32) const { return kernels[KernelInt32].get(); } - cl_kernel getKernel(UInt32) const { return kernels[KernelUInt32].get(); } - cl_kernel getKernel(Int64) const { return kernels[KernelInt64].get(); } - cl_kernel getKernel(UInt64) const { return kernels[KernelUInt64].get(); } - - /// Sorts p_input inplace with indices. Works only with arrays which size equals to power of two. - template - void sort(T * p_input, cl_uint * indices, cl_int array_size, cl_uint sort_ascending) const - { - cl_kernel kernel = getKernel(T(0)); - cl_int error = CL_SUCCESS; - cl_int num_stages = 0; - - for (cl_int temp = array_size; temp > 2; temp >>= 1) - num_stages++; - - /// Creating OpenCL buffers using input arrays memory. - cl_mem cl_input_buffer = OCL::createBuffer(p_input, array_size, configuration.get()->context()); - cl_mem cl_indices_buffer = OCL::createBuffer(indices, array_size, configuration.get()->context()); - - configureKernel(kernel, 0, static_cast(&cl_input_buffer)); - configureKernel(kernel, 1, static_cast(&cl_indices_buffer)); - configureKernel(kernel, 4, static_cast(&sort_ascending)); - - for (cl_int stage = 0; stage < num_stages; stage++) - { - configureKernel(kernel, 2, static_cast(&stage)); - - for (cl_int pass_of_stage = stage; pass_of_stage >= 0; pass_of_stage--) - { - configureKernel(kernel, 3, static_cast(&pass_of_stage)); - - /// Setting work-item dimensions. - size_t gsize = array_size / (2 * 4); - size_t global_work_size[1] = {pass_of_stage ? gsize : gsize << 1 }; // number of quad items in input array - - /// Executing kernel. - error = clEnqueueNDRangeKernel(configuration.get()->commandQueue(), kernel, 1, nullptr, - global_work_size, nullptr, 0, nullptr, nullptr); - OCL::checkError(error); - } - } - - /// Syncs all threads. - OCL::finishCommandQueue(configuration.get()->commandQueue()); - - OCL::releaseData(p_input, array_size, cl_input_buffer, configuration.get()->commandQueue()); - OCL::releaseData(indices, array_size, cl_indices_buffer, configuration.get()->commandQueue()); - } - - template - void configureKernel(cl_kernel kernel, int number_of_argument, void * source) const - { - cl_int error = clSetKernelArg(kernel, number_of_argument, sizeof(T), source); - OCL::checkError(error); - } - - /// Fills given sequences from `arraySize` index with `numberOfStubs` values. - template - void fillWithStubs(T * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending) const - { - T value = sort_ascending ? std::numeric_limits::max() : std::numeric_limits::min(); - for (cl_int index = 0; index < number_of_stubs; ++index) - { - p_input[array_size + index] = value; - indices[array_size + index] = array_size + index; - } - } - - BitonicSort() = default; - BitonicSort(BitonicSort const &) = delete; - void operator = (BitonicSort const &) = delete; -}; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 85da23fb303..bf475bc9b21 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -486,7 +486,6 @@ namespace ErrorCodes extern const int NO_REMOTE_SHARD_AVAILABLE = 519; extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE = 520; extern const int ATOMIC_RENAME_FAIL = 521; - extern const int OPENCL_ERROR = 522; extern const int UNKNOWN_ROW_POLICY = 523; extern const int ALTER_OF_COLUMN_IS_FORBIDDEN = 524; extern const int INCORRECT_DISK_INDEX = 525; diff --git a/src/Common/oclBasics.h b/src/Common/oclBasics.h deleted file mode 100644 index a3e7636af1b..00000000000 --- a/src/Common/oclBasics.h +++ /dev/null @@ -1,354 +0,0 @@ -#pragma once - -#include -#if USE_OPENCL - -#if !defined(__APPLE__) && !defined(__FreeBSD__) -#include -#endif - -#ifdef __APPLE__ -#include -#else -#include -#endif - -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int OPENCL_ERROR; -} -} - -struct OCL -{ - using KernelType = std::remove_reference::type; - - /** - * Structure which represents the most essential settings of common OpenCl entities. - */ - struct Settings - { - // Platform info - cl_uint number_of_platform_entries; - cl_uint * number_of_available_platforms; - - // Devices info - cl_uint number_of_devices_entries; - cl_uint * number_of_available_devices; - - // Context settings - cl_context_properties * context_properties; - - void (* context_callback)(const char *, const void *, size_t, void *); - - void * context_callback_data; - - // Command queue settings - cl_command_queue_properties command_queue_properties; - - // Build settings - cl_uint number_of_program_source_pointers; - - void (* build_notification_routine)(cl_program, void *user_data); - - void * build_callback_data; - char * build_options; - - Settings(cl_uint number_of_platform_entries_, - cl_uint * number_of_available_platforms_, - cl_uint number_of_devices_entries_, - cl_uint * number_of_available_devices_, - cl_uint number_of_program_source_pointers_, - cl_command_queue_properties command_queue_properties_, - cl_context_properties * context_properties_ = nullptr, - void * context_data_callback_ = nullptr, - void (* context_callback_)(const char *, const void *, size_t, void *) = nullptr, - void (* build_notification_routine_)(cl_program, void * user_data) = nullptr, - void * build_callback_data_ = nullptr, - char * build_options_ = nullptr) - { - this->number_of_platform_entries = number_of_platform_entries_; - this->number_of_available_platforms = number_of_available_platforms_; - this->number_of_devices_entries = number_of_devices_entries_; - this->number_of_available_devices = number_of_available_devices_; - this->number_of_program_source_pointers = number_of_program_source_pointers_; - this->command_queue_properties = command_queue_properties_; - this->context_properties = context_properties_; - this->context_callback = context_callback_; - this->context_callback_data = context_data_callback_; - this->build_notification_routine = build_notification_routine_; - this->build_callback_data = build_callback_data_; - this->build_options = build_options_; - } - }; - - - /** - * Configuration with already created OpenCl common entities. - */ - class Configuration - { - public: - - Configuration(cl_device_id device, cl_context gpu_context, - cl_command_queue command_queue, cl_program program) - { - this->device_ = device; - this->gpu_context_ = std::shared_ptr(gpu_context, clReleaseContext); - this->command_queue_ = std::shared_ptr(command_queue, clReleaseCommandQueue); - this->program_ = std::shared_ptr(program, clReleaseProgram); - } - - cl_device_id device() { return device_; } - - cl_context context() { return gpu_context_.get(); } - - cl_command_queue commandQueue() { return command_queue_.get(); } - - cl_program program() { return program_.get(); } - - private: - - using ProgramType = std::remove_reference::type; - using CommandQueueType = std::remove_reference::type; - using ContextType = std::remove_reference::type; - - cl_device_id device_; - - std::shared_ptr gpu_context_; - std::shared_ptr command_queue_; - std::shared_ptr program_; - }; - - - static String opencl_error_to_str(cl_int error) - { -#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME; - - // Suppose that no combinations are possible. - switch (error) - { - CASE_CL_CONSTANT(CL_SUCCESS) - CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND) - CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE) - CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE) - CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE) - CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES) - CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY) - CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE) - CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP) - CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH) - CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED) - CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE) - CASE_CL_CONSTANT(CL_MAP_FAILURE) - CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET) - CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) - CASE_CL_CONSTANT(CL_COMPILE_PROGRAM_FAILURE) - CASE_CL_CONSTANT(CL_LINKER_NOT_AVAILABLE) - CASE_CL_CONSTANT(CL_LINK_PROGRAM_FAILURE) - CASE_CL_CONSTANT(CL_DEVICE_PARTITION_FAILED) - CASE_CL_CONSTANT(CL_KERNEL_ARG_INFO_NOT_AVAILABLE) - CASE_CL_CONSTANT(CL_INVALID_VALUE) - CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE) - CASE_CL_CONSTANT(CL_INVALID_PLATFORM) - CASE_CL_CONSTANT(CL_INVALID_DEVICE) - CASE_CL_CONSTANT(CL_INVALID_CONTEXT) - CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES) - CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE) - CASE_CL_CONSTANT(CL_INVALID_HOST_PTR) - CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT) - CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) - CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE) - CASE_CL_CONSTANT(CL_INVALID_SAMPLER) - CASE_CL_CONSTANT(CL_INVALID_BINARY) - CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS) - CASE_CL_CONSTANT(CL_INVALID_PROGRAM) - CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE) - CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME) - CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION) - CASE_CL_CONSTANT(CL_INVALID_KERNEL) - CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX) - CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE) - CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE) - CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS) - CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION) - CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE) - CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE) - CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET) - CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST) - CASE_CL_CONSTANT(CL_INVALID_EVENT) - CASE_CL_CONSTANT(CL_INVALID_OPERATION) - CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT) - CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE) - CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL) - CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE) - CASE_CL_CONSTANT(CL_INVALID_PROPERTY) - CASE_CL_CONSTANT(CL_INVALID_IMAGE_DESCRIPTOR) - CASE_CL_CONSTANT(CL_INVALID_COMPILER_OPTIONS) - CASE_CL_CONSTANT(CL_INVALID_LINKER_OPTIONS) - CASE_CL_CONSTANT(CL_INVALID_DEVICE_PARTITION_COUNT) - default: - return "UNKNOWN ERROR CODE "; - } - -#undef CASE_CL_CONSTANT - } - - - static void checkError(cl_int error) - { - if (error != CL_SUCCESS) - throw DB::Exception("OpenCL error: " + opencl_error_to_str(error), DB::ErrorCodes::OPENCL_ERROR); - } - - - /// Getting OpenCl main entities. - - static cl_platform_id getPlatformID(const Settings & settings) - { - cl_platform_id platform; - cl_int error = clGetPlatformIDs(settings.number_of_platform_entries, &platform, - settings.number_of_available_platforms); - checkError(error); - return platform; - } - - static cl_device_id getDeviceID(cl_platform_id & platform, const Settings & settings) - { - cl_device_id device; - cl_int error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, settings.number_of_devices_entries, - &device, settings.number_of_available_devices); - OCL::checkError(error); - return device; - } - - static cl_context makeContext(cl_device_id & device, const Settings & settings) - { - cl_int error; - cl_context gpu_context = clCreateContext(settings.context_properties, settings.number_of_devices_entries, - &device, settings.context_callback, settings.context_callback_data, - &error); - OCL::checkError(error); - return gpu_context; - } - - template - static cl_command_queue makeCommandQueue(cl_device_id & device, cl_context & context, const Settings & settings [[maybe_unused]]) - { - cl_int error; - cl_command_queue command_queue; - - if constexpr (version == 1) - { -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - command_queue = clCreateCommandQueue(context, device, settings.command_queue_properties, &error); -#pragma GCC diagnostic pop - } - else - { -#ifdef CL_VERSION_2_0 - command_queue = clCreateCommandQueueWithProperties(context, device, nullptr, &error); -#else - throw DB::Exception("Binary is built with OpenCL version < 2.0", DB::ErrorCodes::OPENCL_ERROR); -#endif - } - - OCL::checkError(error); - return command_queue; - } - - static cl_program makeProgram(const char * source_code, cl_context context, - cl_device_id device_id, const Settings & settings) - { - cl_int error = 0; - size_t source_size = strlen(source_code); - - cl_program program = clCreateProgramWithSource(context, settings.number_of_program_source_pointers, - &source_code, &source_size, &error); - checkError(error); - - error = clBuildProgram(program, settings.number_of_devices_entries, &device_id, settings.build_options, - settings.build_notification_routine, settings.build_callback_data); - - /// Combining additional logs output when program build failed. - if (error == CL_BUILD_PROGRAM_FAILURE) - { - size_t log_size; - error = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size); - - checkError(error); - - std::vector log(log_size); - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log.data(), nullptr); - - checkError(error); - throw DB::Exception(log.data(), DB::ErrorCodes::OPENCL_ERROR); - } - - checkError(error); - return program; - } - - /// Configuring buffer for given input data - - template - static cl_mem createBuffer(K * p_input, cl_int array_size, cl_context context, cl_int elements_size = sizeof(K)) - { - cl_int error = CL_SUCCESS; - cl_mem cl_input_buffer = clCreateBuffer( - context, - CL_MEM_USE_HOST_PTR, - zeroCopySizeAlignment(elements_size * array_size), - p_input, - &error); - checkError(error); - return cl_input_buffer; - } - - static size_t zeroCopySizeAlignment(size_t required_size) - { - return required_size + (~required_size + 1) % 64; - } - - /// Manipulating with common OpenCL variables. - - static void finishCommandQueue(cl_command_queue command_queue) - { - // Blocks until all previously queued OpenCL commands in a queue are issued to the associated device. - cl_int error = clFinish(command_queue); - OCL::checkError(error); - } - - template - static void releaseData(T * origin, cl_int array_size, cl_mem cl_buffer, cl_command_queue command_queue, size_t offset = 0) - { - cl_int error = CL_SUCCESS; - - void * tmp_ptr = nullptr; - - // No events specified to be completed before enqueueing buffers, - // so `num_events_in_wait_list` passed with `0` value. - - tmp_ptr = clEnqueueMapBuffer(command_queue, cl_buffer, true, CL_MAP_READ, - offset, sizeof(cl_int) * array_size, 0, nullptr, nullptr, &error); - OCL::checkError(error); - if (tmp_ptr != origin) - throw DB::Exception("clEnqueueMapBuffer failed to return original pointer", DB::ErrorCodes::OPENCL_ERROR); - - error = clEnqueueUnmapMemObject(command_queue, cl_buffer, tmp_ptr, 0, nullptr, nullptr); - checkError(error); - - error = clReleaseMemObject(cl_buffer); - checkError(error); - } -}; - -#endif diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt index 8de9424e044..6a39c2f8553 100644 --- a/src/Common/tests/CMakeLists.txt +++ b/src/Common/tests/CMakeLists.txt @@ -35,11 +35,6 @@ add_executable (radix_sort radix_sort.cpp) target_link_libraries (radix_sort PRIVATE clickhouse_common_io) target_include_directories(radix_sort SYSTEM PRIVATE ${PDQSORT_INCLUDE_DIR}) -if (USE_OPENCL) - add_executable (bitonic_sort bitonic_sort.cpp) - target_link_libraries (bitonic_sort PRIVATE clickhouse_common_io ${OPENCL_LINKER_FLAGS} ${OpenCL_LIBRARIES}) -endif () - add_executable (arena_with_free_lists arena_with_free_lists.cpp) target_link_libraries (arena_with_free_lists PRIVATE dbms) diff --git a/src/Common/tests/bitonic_sort.cpp b/src/Common/tests/bitonic_sort.cpp deleted file mode 100644 index 2545662c8cb..00000000000 --- a/src/Common/tests/bitonic_sort.cpp +++ /dev/null @@ -1,174 +0,0 @@ -#include -#include - -#if !defined(__APPLE__) && !defined(__FreeBSD__) -#include -#endif -#include -#include -#include -#include -#include -#include - -#include "Common/BitonicSort.h" - - -/// Generates vector of size 8 for testing. -/// Vector contains max possible value, min possible value and duplicate values. -template -static void generateTest(std::vector & data, Type min_value, Type max_value) -{ - int size = 10; - - data.resize(size); - data[0] = 10; - data[1] = max_value; - data[2] = 10; - data[3] = 20; - data[4] = min_value; - data[5] = min_value + 1; - data[6] = max_value - 5; - data[7] = 1; - data[8] = 42; - data[9] = max_value - 1; -} - - -static void check(const std::vector & indices, bool reverse = true) -{ - std::vector reference_indices{4, 5, 7, 0, 2, 3, 8, 6, 9, 1}; - if (reverse) std::reverse(reference_indices.begin(), reference_indices.end()); - - bool success = true; - for (size_t index = 0; index < reference_indices.size(); ++index) - { - if (indices[index] != reference_indices[index]) - { - success = false; - std::cerr << "Test failed. Reason: indices[" << index << "] = " - << indices[index] << ", it must be equal to " << reference_indices[index] << "\n"; - } - } - - std::string order_description = reverse ? "descending" : "ascending"; - std::cerr << "Sorted " << order_description << " sequence. Result: " << (success ? "Ok." : "Fail!") << "\n"; -} - - -template -static void sortBitonicSortWithPodArrays(const std::vector & data, std::vector & indices, bool ascending = true) -{ - DB::PaddedPODArray pod_array_data = DB::PaddedPODArray(data.size()); - DB::IColumn::Permutation pod_array_indices = DB::IColumn::Permutation(data.size()); - - for (size_t index = 0; index < data.size(); ++index) - { - *(pod_array_data.data() + index) = data[index]; - *(pod_array_indices.data() + index) = index; - } - - BitonicSort::getInstance().sort(pod_array_data, pod_array_indices, ascending); - - for (size_t index = 0; index < data.size(); ++index) - indices[index] = pod_array_indices[index]; -} - - -template -static void testBitonicSort(const std::string & test_name, Type min_value, Type max_value) -{ - std::cerr << test_name << std::endl; - - std::vector data; - generateTest(data, min_value, max_value); - - std::vector indices(data.size()); - - sortBitonicSortWithPodArrays(data, indices, true); - check(indices, false); - - sortBitonicSortWithPodArrays(data, indices, false); - check(indices, true); -} - - -static void straightforwardTests() -{ - testBitonicSort("Test 01: Int8.", CHAR_MIN, CHAR_MAX); - testBitonicSort("Test 02: UInt8.", 0, UCHAR_MAX); - testBitonicSort("Test 03: Int16.", SHRT_MIN, SHRT_MAX); - testBitonicSort("Test 04: UInt16.", 0, USHRT_MAX); - testBitonicSort("Test 05: Int32.", INT_MIN, INT_MAX); - testBitonicSort("Test 06: UInt32.", 0, UINT_MAX); - testBitonicSort("Test 07: Int64.", LONG_MIN, LONG_MAX); - testBitonicSort("Test 08: UInt64.", 0, ULONG_MAX); -} - - -template -static void bitonicSort(std::vector & data) -{ - size_t size = data.size(); - std::vector indices(size); - for (size_t i = 0; i < size; ++i) - indices[i] = i; - - sortBitonicSortWithPodArrays(data, indices); - - std::vector result(size); - for (size_t i = 0; i < size; ++i) - result[i] = data[indices[i]]; - - data = std::move(result); -} - - -template -static bool checkSort(const std::vector & data, size_t size) -{ - std::vector copy1(data.begin(), data.begin() + size); - std::vector copy2(data.begin(), data.begin() + size); - - std::sort(copy1.data(), copy1.data() + size); - bitonicSort(copy2); - - for (size_t i = 0; i < size; ++i) - if (copy1[i] != copy2[i]) - return false; - - return true; -} - - -int main() -{ - BitonicSort::getInstance().configure(); - - straightforwardTests(); - - size_t size = 1100; - std::vector data(size); - for (size_t i = 0; i < size; ++i) - data[i] = rand(); - - for (size_t i = 0; i < 128; ++i) - { - if (!checkSort(data, i)) - { - std::cerr << "fail at length " << i << std::endl; - return 1; - } - } - - for (size_t i = 128; i < size; i += 7) - { - if (!checkSort(data, i)) - { - std::cerr << "fail at length " << i << std::endl; - return 1; - } - } - - return 0; -} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5417bbad64c..b96b1b12c24 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -105,8 +105,6 @@ class IColumn; M(UInt64, parallel_replicas_count, 0, "", 0) \ M(UInt64, parallel_replica_offset, 0, "", 0) \ \ - M(SpecialSort, special_sort, SpecialSort::NOT_SPECIFIED, "Specifies a sorting algorithm which will be using in ORDER BY query.", 0) \ - \ M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \ \ M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index c0d2906e2fc..b4db51a506d 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -23,11 +23,6 @@ IMPLEMENT_SETTING_ENUM(LoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING, {"round_robin", LoadBalancing::ROUND_ROBIN}}) -IMPLEMENT_SETTING_ENUM(SpecialSort, ErrorCodes::UNKNOWN_JOIN, - {{"not_specified", SpecialSort::NOT_SPECIFIED}, - {"opencl_bitonic", SpecialSort::OPENCL_BITONIC}}) - - IMPLEMENT_SETTING_ENUM(JoinStrictness, ErrorCodes::UNKNOWN_JOIN, {{"", JoinStrictness::Unspecified}, {"ALL", JoinStrictness::ALL}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 7ed5ffb0c35..426497fff78 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -47,15 +47,6 @@ enum class JoinAlgorithm DECLARE_SETTING_ENUM(JoinAlgorithm) -enum class SpecialSort -{ - NOT_SPECIFIED = 0, - OPENCL_BITONIC, -}; - -DECLARE_SETTING_ENUM(SpecialSort) - - /// Which rows should be included in TOTALS. enum class TotalsMode { diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index 2198789b0b6..bd3b7bc45ff 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -32,22 +32,20 @@ struct SortColumnDescription std::shared_ptr collator; /// Collator for locale-specific comparison of strings bool with_fill; FillColumnDescription fill_description; - SpecialSort special_sort; - SortColumnDescription( size_t column_number_, int direction_, int nulls_direction_, - const std::shared_ptr & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED, + const std::shared_ptr & collator_ = nullptr, bool with_fill_ = false, const FillColumnDescription & fill_description_ = {}) : column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_) - , with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {} + , with_fill(with_fill_), fill_description(fill_description_) {} SortColumnDescription( const std::string & column_name_, int direction_, int nulls_direction_, - const std::shared_ptr & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED, + const std::shared_ptr & collator_ = nullptr, bool with_fill_ = false, const FillColumnDescription & fill_description_ = {}) : column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_) - , collator(collator_), with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {} + , collator(collator_), with_fill(with_fill_), fill_description(fill_description_) {} bool operator == (const SortColumnDescription & other) const { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8f9f22e6ee2..22106387fc4 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -621,7 +621,6 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co { SortDescription order_descr; order_descr.reserve(query.orderBy()->children.size()); - SpecialSort special_sort = context.getSettings().special_sort.value; for (const auto & elem : query.orderBy()->children) { String name = elem->children.front()->getColumnName(); @@ -635,10 +634,10 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, const Co { FillColumnDescription fill_desc = getWithFillDescription(order_by_elem, context); order_descr.emplace_back(name, order_by_elem.direction, - order_by_elem.nulls_direction, collator, special_sort, true, fill_desc); + order_by_elem.nulls_direction, collator, true, fill_desc); } else - order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator, special_sort); + order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.nulls_direction, collator); } return order_descr; diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index d84708b9c57..c2436806fcd 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -132,12 +132,7 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) else if (!isColumnConst(*column)) { int nan_direction_hint = description[0].nulls_direction; - auto special_sort = description[0].special_sort; - - if (special_sort == SpecialSort::OPENCL_BITONIC) - column->getSpecialPermutation(reverse, limit, nan_direction_hint, perm, IColumn::SpecialSort::OPENCL_BITONIC); - else - column->getPermutation(reverse, limit, nan_direction_hint, perm); + column->getPermutation(reverse, limit, nan_direction_hint, perm); } else /// we don't need to do anything with const column