diff --git a/CMakeLists.txt b/CMakeLists.txt index 11bc21e62ed..09227fead49 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,17 +25,10 @@ endif () # Write compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS 1) -set(PARALLEL_COMPILE_JOBS "" CACHE STRING "Define the maximum number of concurrent compilation jobs") -if (PARALLEL_COMPILE_JOBS) - set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool="${PARALLEL_COMPILE_JOBS}") - set(CMAKE_JOB_POOL_COMPILE compile_job_pool) -endif () -set(PARALLEL_LINK_JOBS "" CACHE STRING "Define the maximum number of concurrent link jobs") -if (LLVM_PARALLEL_LINK_JOBS) - set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${PARALLEL_LINK_JOBS}) - set(CMAKE_JOB_POOL_LINK link_job_pool) -endif () +set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") +set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") +include (cmake/limit_jobs.cmake) include (cmake/find_ccache.cmake) @@ -162,51 +155,8 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fn set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}") -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package (Threads) -include (cmake/test_compiler.cmake) - -if (OS_LINUX AND COMPILER_CLANG) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") - - option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX}) - set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.") - - if (USE_LIBCXX) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning - set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build. - if (MAKE_STATIC_LIBRARIES) - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) - link_libraries (-nodefaultlibs -Wl,-Bstatic -stdlib=libc++ c++ c++abi gcc_eh ${BUILTINS_LIB_PATH} rt -Wl,-Bdynamic dl pthread m c) - else () - link_libraries (-stdlib=libc++ c++ c++abi) - endif () - - if (LIBCXX_PATH) -# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1") - link_directories ("${LIBCXX_PATH}/lib") - endif () - endif () -endif () - -if (USE_LIBCXX) - set (STATIC_STDLIB_FLAGS "") -else () - set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++") -endif () - -if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD)) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") - - # Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained. - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") -endif () - -if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}") -endif () +include (cmake/use_libcxx.cmake) if (NOT MAKE_STATIC_LIBRARIES) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -268,6 +218,7 @@ include (cmake/find_odbc.cmake) # openssl, zlib, odbc before poco include (cmake/find_poco.cmake) include (cmake/find_lz4.cmake) +include (cmake/find_xxhash.cmake) include (cmake/find_sparsehash.cmake) include (cmake/find_rt.cmake) include (cmake/find_execinfo.cmake) diff --git a/cmake/find_xxhash.cmake b/cmake/find_xxhash.cmake new file mode 100644 index 00000000000..0b684838306 --- /dev/null +++ b/cmake/find_xxhash.cmake @@ -0,0 +1,10 @@ +if (LZ4_INCLUDE_DIR) + if (NOT EXISTS "${LZ4_INCLUDE_DIR}/xxhash.h") + message (WARNING "LZ4 library does not have XXHash. Support for XXHash will be disabled.") + set (USE_XXHASH 0) + else () + set (USE_XXHASH 1) + endif () +endif () + +message (STATUS "Using xxhash=${USE_XXHASH}") diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake new file mode 100644 index 00000000000..d71c5260240 --- /dev/null +++ b/cmake/limit_jobs.cmake @@ -0,0 +1,35 @@ +# Usage: +# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes +# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") +# include (cmake/limit_jobs.cmake) + +cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd + +option(PARALLEL_COMPILE_JOBS "Define the maximum number of concurrent compilation jobs" "") +if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY) + math(EXPR PARALLEL_COMPILE_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/2500) # ~2.5gb max per one compiler + if (NOT PARALLEL_COMPILE_JOBS) + set (PARALLEL_COMPILE_JOBS 1) + endif () +endif () +if (PARALLEL_COMPILE_JOBS) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${PARALLEL_COMPILE_JOBS}) + set(CMAKE_JOB_POOL_COMPILE compile_job_pool) +endif () + +option(PARALLEL_LINK_JOBS "Define the maximum number of concurrent link jobs" "") +if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY) + math(EXPR PARALLEL_LINK_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/4000) # ~4gb max per one linker + if (NOT PARALLEL_LINK_JOBS) + set (PARALLEL_LINK_JOBS 1) + endif () +endif () +if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) + message(STATUS "Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}") +endif () + +if (LLVM_PARALLEL_LINK_JOBS) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${PARALLEL_LINK_JOBS}) + set(CMAKE_JOB_POOL_LINK link_job_pool) +endif () + diff --git a/cmake/use_libcxx.cmake b/cmake/use_libcxx.cmake new file mode 100644 index 00000000000..72aff8f29be --- /dev/null +++ b/cmake/use_libcxx.cmake @@ -0,0 +1,49 @@ +# Uses MAKE_STATIC_LIBRARIES + + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package (Threads) + +include (cmake/test_compiler.cmake) +include (cmake/arch.cmake) + +if (OS_LINUX AND COMPILER_CLANG) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + + option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX}) + set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.") + + if (USE_LIBCXX) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning + set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build. + if (MAKE_STATIC_LIBRARIES) + execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) + link_libraries (-nodefaultlibs -Wl,-Bstatic -stdlib=libc++ c++ c++abi gcc_eh ${BUILTINS_LIB_PATH} rt -Wl,-Bdynamic dl pthread m c) + else () + link_libraries (-stdlib=libc++ c++ c++abi) + endif () + + if (LIBCXX_PATH) +# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1") + link_directories ("${LIBCXX_PATH}/lib") + endif () + endif () +endif () + +if (USE_LIBCXX) + set (STATIC_STDLIB_FLAGS "") +else () + set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++") +endif () + +if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD)) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") + + # Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained. + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}") +endif () + +if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}") +endif () diff --git a/dbms/programs/clang/CMakeLists.txt b/dbms/programs/clang/CMakeLists.txt index dec21ac611e..78bfa6b55e7 100644 --- a/dbms/programs/clang/CMakeLists.txt +++ b/dbms/programs/clang/CMakeLists.txt @@ -27,12 +27,12 @@ elseif (EXISTS ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}) endif () if (COPY_HEADERS_COMPILER AND OS_LINUX) - add_custom_target (copy-headers ALL env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh) + add_custom_target (copy-headers env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh) if (USE_INTERNAL_LLVM_LIBRARY) set (CLANG_HEADERS_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/lib/Headers") set (CLANG_HEADERS_DEST "${TMP_HEADERS_DIR}/usr/local/lib/clang/${LLVM_VERSION}/include") # original: ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include - add_custom_target (copy-headers-clang ALL ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} ) + add_custom_target (copy-headers-clang ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} ) add_dependencies (copy-headers copy-headers-clang) endif () endif () diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index f965bf58eaa..499f233ff28 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -2,7 +2,11 @@ #include #include +#include +#include #include +#include +#include #include #include #include @@ -70,6 +74,8 @@ namespace ErrorCodes extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; extern const int SYSTEM_ERROR; + extern const int FAILED_TO_GETPWUID; + extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; } @@ -83,6 +89,26 @@ static std::string getCanonicalPath(std::string && path) return std::move(path); } +static std::string getUserName(uid_t user_id) +{ + /// Try to convert user id into user name. + auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); + if (buffer_size <= 0) + buffer_size = 1024; + std::string buffer; + buffer.reserve(buffer_size); + + struct passwd passwd_entry; + struct passwd * result = nullptr; + const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result); + + if (error) + throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error); + else if (result) + return result->pw_name; + return toString(user_id); +} + void Server::uninitialize() { logger().information("shutting down"); @@ -166,6 +192,26 @@ int Server::main(const std::vector & /*args*/) std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); std::string default_database = config().getString("default_database", "default"); + /// Check that the process' user id matches the owner of the data. + const auto effective_user_id = geteuid(); + struct stat statbuf; + if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) + { + const auto effective_user = getUserName(effective_user_id); + const auto data_owner = getUserName(statbuf.st_uid); + std::string message = "Effective user of the process (" + effective_user + + ") does not match the owner of the data (" + data_owner + ")."; + if (effective_user_id == 0) + { + message += " Run under 'sudo -u " + data_owner + "'."; + throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); + } + else + { + LOG_WARNING(log, message); + } + } + global_context->setPath(path); /// Create directories for 'path' and for default database, if not exist. diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 18925ea165c..eb497ea8f31 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -322,17 +322,11 @@ bool ColumnArray::hasEqualOffsets(const ColumnArray & other) const ColumnPtr ColumnArray::convertToFullColumnIfConst() const { - ColumnPtr new_data; - - if (ColumnPtr full_column = getData().convertToFullColumnIfConst()) - new_data = full_column; - else - new_data = data; - - return ColumnArray::create(new_data, offsets); + /// It is possible to have an array with constant data and non-constant offsets. + /// Example is the result of expression: replicate('hello', [1]) + return ColumnArray::create(data->convertToFullColumnIfConst(), offsets); } - void ColumnArray::getExtremes(Field & min, Field & max) const { min = Array(); diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 10a4519bf73..b88cf60581b 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -22,8 +22,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn : nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) { /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. - if (ColumnPtr nested_column_materialized = getNestedColumn().convertToFullColumnIfConst()) - nested_column = nested_column_materialized; + nested_column = getNestedColumn().convertToFullColumnIfConst(); if (!getNestedColumn().canBeInsideNullable()) throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column", ErrorCodes::ILLEGAL_COLUMN}; diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 8f374ac526b..38df6ab3c38 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -45,7 +45,7 @@ public: /** If column isn't constant, returns nullptr (or itself). * If column is constant, transforms constant to full column (if column type allows such tranform) and return it. */ - virtual Ptr convertToFullColumnIfConst() const { return {}; } + virtual Ptr convertToFullColumnIfConst() const { return getPtr(); } /// If column isn't ColumnLowCardinality, return itself. /// If column is ColumnLowCardinality, transforms is to full column. diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 0b7d089361c..4528578058a 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -403,6 +403,8 @@ namespace ErrorCodes extern const int NULL_POINTER_DEREFERENCE = 426; extern const int CANNOT_COMPILE_REGEXP = 427; extern const int UNKNOWN_LOG_LEVEL = 428; + extern const int FAILED_TO_GETPWUID = 429; + extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA = 430; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 302fc33c6b4..624c87b91b5 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -9,11 +9,13 @@ #cmakedefine01 USE_RDKAFKA #cmakedefine01 USE_CAPNP #cmakedefine01 USE_EMBEDDED_COMPILER -#cmakedefine01 LLVM_HAS_RTTI #cmakedefine01 USE_POCO_SQLODBC #cmakedefine01 USE_POCO_DATAODBC #cmakedefine01 USE_POCO_MONGODB #cmakedefine01 USE_POCO_NETSSL -#cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 USE_BASE64 #cmakedefine01 USE_HDFS +#cmakedefine01 USE_XXHASH + +#cmakedefine01 CLICKHOUSE_SPLIT_BINARY +#cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 1869badfe14..fb5aadb2fb3 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -46,12 +46,7 @@ void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ - ColumnPtr full_column; - - if (ColumnPtr converted = column->convertToFullColumnIfConst()) - full_column = converted; - else - full_column = column; + ColumnPtr full_column = column->convertToFullColumnIfConst(); IDataType::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; }; diff --git a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp index 103d880f1d3..b7db949ca24 100644 --- a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp +++ b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp @@ -127,10 +127,7 @@ Block TotalsHavingBlockInputStream::readImpl() expression->execute(finalized); size_t filter_column_pos = finalized.getPositionByName(filter_column_name); - ColumnPtr filter_column_ptr = finalized.safeGetByPosition(filter_column_pos).column; - - if (ColumnPtr materialized = filter_column_ptr->convertToFullColumnIfConst()) - filter_column_ptr = materialized; + ColumnPtr filter_column_ptr = finalized.safeGetByPosition(filter_column_pos).column->convertToFullColumnIfConst(); FilterDescription filter_description(*filter_column_ptr); diff --git a/dbms/src/DataStreams/materializeBlock.cpp b/dbms/src/DataStreams/materializeBlock.cpp index a190baae9fa..60cd197912f 100644 --- a/dbms/src/DataStreams/materializeBlock.cpp +++ b/dbms/src/DataStreams/materializeBlock.cpp @@ -14,9 +14,7 @@ Block materializeBlock(const Block & block) for (size_t i = 0; i < columns; ++i) { auto & element = res.getByPosition(i); - auto & src = element.column; - if (ColumnPtr converted = src->convertToFullColumnIfConst()) - src = converted; + element.column = element.column->convertToFullColumnIfConst(); } return res; diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 1307c47260e..b4dcaa49410 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -1,8 +1,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) -add_headers_and_sources(clickhouse_functions .) add_headers_and_sources(clickhouse_functions ./GatherUtils) -add_headers_and_sources(clickhouse_functions ./Conditional) +add_headers_and_sources(clickhouse_functions .) list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp) @@ -12,7 +11,7 @@ add_library(clickhouse_functions ${LINK_MODE} ${clickhouse_functions_sources}) target_link_libraries(clickhouse_functions PUBLIC - dbms + dbms PRIVATE clickhouse_dictionaries ${CONSISTENT_HASHING_LIBRARY} @@ -21,7 +20,8 @@ target_link_libraries(clickhouse_functions ${METROHASH_LIBRARIES} murmurhash ${BASE64_LIBRARY} - ${OPENSSL_CRYPTO_LIBRARY}) + ${OPENSSL_CRYPTO_LIBRARY} + ${LZ4_LIBRARY}) target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) diff --git a/dbms/src/Functions/FunctionBinaryArithmetic.h b/dbms/src/Functions/FunctionBinaryArithmetic.h index a668c43b122..12c306cdf34 100644 --- a/dbms/src/Functions/FunctionBinaryArithmetic.h +++ b/dbms/src/Functions/FunctionBinaryArithmetic.h @@ -11,13 +11,14 @@ #include #include #include -#include -#include +#include "IFunction.h" +#include "FunctionHelpers.h" +#include "intDiv.h" +#include "castTypeToEither.h" +#include "FunctionFactory.h" #include #include #include -#include -#include #include #if USE_EMBEDDED_COMPILER diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index a8116f44fbb..176cfe80eea 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -346,11 +346,8 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; - /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); if (checkColumn(key_col.get())) { @@ -578,11 +575,8 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; - /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); const auto & key_columns = typeid_cast(*key_col).getColumns(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); @@ -813,11 +807,9 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); if (checkColumn(key_col.get())) { @@ -1079,11 +1071,9 @@ private: String attr_name = attr_name_col->getValue(); const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]); - ColumnPtr key_col = key_col_with_type.column; /// Functions in external dictionaries only support full-value (not constant) columns with keys. - if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst()) - key_col = key_col_materialized; + ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); const auto & key_columns = typeid_cast(*key_col).getColumns(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); @@ -1691,7 +1681,7 @@ static const PaddedPODArray & getColumnDataAsPaddedPODArray(const IColumn & c } } - const auto full_column = column.isColumnConst() ? column.convertToFullColumnIfConst() : column.getPtr(); + const auto full_column = column.convertToFullColumnIfConst(); // With type conversion and const columns we need to use backup storage here const auto size = full_column->size(); diff --git a/dbms/src/Functions/FunctionsFindCluster.h b/dbms/src/Functions/FunctionsFindCluster.h index 9e7e43c7dd9..5b20767c8aa 100644 --- a/dbms/src/Functions/FunctionsFindCluster.h +++ b/dbms/src/Functions/FunctionsFindCluster.h @@ -227,8 +227,7 @@ protected: bool executeOperationTyped(const IColumn * in_untyped, PaddedPODArray & dst, const IColumn * centroids_array_untyped) { const auto maybe_const = in_untyped->convertToFullColumnIfConst(); - if (maybe_const) - in_untyped = maybe_const.get(); + in_untyped = maybe_const.get(); const auto in_vector = checkAndGetColumn>(in_untyped); if (in_vector) diff --git a/dbms/src/Functions/FunctionsHashing.cpp b/dbms/src/Functions/FunctionsHashing.cpp index edf3a66d515..787fd80ae08 100644 --- a/dbms/src/Functions/FunctionsHashing.cpp +++ b/dbms/src/Functions/FunctionsHashing.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB @@ -27,5 +28,10 @@ void registerFunctionsHashing(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + +#if USE_XXHASH + factory.registerFunction(); + factory.registerFunction(); +#endif } } diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 3dd72865337..bce3c27b24f 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -8,6 +8,11 @@ #include #include +#include +#if USE_XXHASH + #include +#endif + #include #include @@ -117,6 +122,7 @@ struct HalfMD5Impl /// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions. /// Otherwise it will hash bytes in memory as a string using corresponding hash function. + static constexpr bool use_int_hash_for_pods = false; }; @@ -402,6 +408,49 @@ struct ImplMetroHash64 }; +#if USE_XXHASH + +struct ImplXxHash32 +{ + static constexpr auto name = "xxHash32"; + using ReturnType = UInt32; + + static auto apply(const char * s, const size_t len) { return XXH32(s, len, 0); } + /** + * With current implementation with more than 1 arguments it will give the results + * non-reproducable from outside of CH. + * + * Proper way of combining several input is to use streaming mode of hash function + * https://github.com/Cyan4973/xxHash/issues/114#issuecomment-334908566 + * + * In common case doable by init_state / update_state / finalize_state + */ + static auto combineHashes(UInt32 h1, UInt32 h2) { return IntHash32Impl::apply(h1) ^ h2; } + + static constexpr bool use_int_hash_for_pods = false; +}; + + +struct ImplXxHash64 +{ + static constexpr auto name = "xxHash64"; + using ReturnType = UInt64; + using uint128_t = CityHash_v1_0_2::uint128; + + static auto apply(const char * s, const size_t len) { return XXH64(s, len, 0); } + + /* + With current implementation with more than 1 arguments it will give the results + non-reproducable from outside of CH. (see comment on ImplXxHash32). + */ + static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + + static constexpr bool use_int_hash_for_pods = false; +}; + +#endif + + template class FunctionStringHashFixedString : public IFunction { @@ -1027,4 +1076,9 @@ using FunctionMurmurHash3_128 = FunctionStringHashFixedString; using FunctionHiveHash = FunctionAnyHash; +#if USE_XXHASH + using FunctionXxHash32 = FunctionAnyHash; + using FunctionXxHash64 = FunctionAnyHash; +#endif + } diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 08376a94f78..6b6186302f7 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -157,10 +157,7 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const Colum if (!result_null_map_column) return makeNullable(src); - if (src_not_nullable->isColumnConst()) - return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column); - else - return ColumnNullable::create(src_not_nullable, result_null_map_column); + return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column); } @@ -431,9 +428,7 @@ void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, si executeWithoutLowCardinalityColumns(block_without_low_cardinality, args, result, block_without_low_cardinality.rows(), dry_run); - auto & keys = block_without_low_cardinality.safeGetByPosition(result).column; - if (auto full_column = keys->convertToFullColumnIfConst()) - keys = full_column; + auto keys = block_without_low_cardinality.safeGetByPosition(result).column->convertToFullColumnIfConst(); auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType()); ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size()); diff --git a/dbms/src/Functions/array.cpp b/dbms/src/Functions/array.cpp index c0420b83db9..5205eff6b5f 100644 --- a/dbms/src/Functions/array.cpp +++ b/dbms/src/Functions/array.cpp @@ -69,8 +69,7 @@ public: if (!arg.type->equals(*elem_type)) preprocessed_column = castColumn(arg, elem_type, context); - if (ColumnPtr materialized_column = preprocessed_column->convertToFullColumnIfConst()) - preprocessed_column = materialized_column; + preprocessed_column = preprocessed_column->convertToFullColumnIfConst(); columns_holder[i] = std::move(preprocessed_column); columns[i] = columns_holder[i].get(); diff --git a/dbms/src/Functions/arrayEnumerateExtended.h b/dbms/src/Functions/arrayEnumerateExtended.h index 86eba2ded7c..a0ceed86b4a 100644 --- a/dbms/src/Functions/arrayEnumerateExtended.h +++ b/dbms/src/Functions/arrayEnumerateExtended.h @@ -61,21 +61,10 @@ private: static constexpr size_t INITIAL_SIZE_DEGREE = 9; template - bool executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool execute128bit( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns); - - void executeHashed( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - ColumnUInt32::Container & res_values); + bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); + bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); }; @@ -83,14 +72,14 @@ template void FunctionArrayEnumerateExtended::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { const ColumnArray::Offsets * offsets = nullptr; - ColumnRawPtrs data_columns; - data_columns.reserve(arguments.size()); + size_t num_arguments = arguments.size(); + ColumnRawPtrs data_columns(num_arguments); - bool has_nullable_columns = false; - - for (size_t i = 0; i < arguments.size(); ++i) + Columns array_holders; + ColumnPtr offsets_column; + for (size_t i = 0; i < num_arguments; ++i) { - ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; + const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column; const ColumnArray * array = checkAndGetColumn(array_ptr.get()); if (!array) { @@ -100,101 +89,84 @@ void FunctionArrayEnumerateExtended::executeImpl(Block & block, const C throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() + " of " + toString(i + 1) + "-th argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - array_ptr = const_array->convertToFullColumn(); - array = checkAndGetColumn(array_ptr.get()); + array_holders.emplace_back(const_array->convertToFullColumn()); + array = checkAndGetColumn(array_holders.back().get()); } const ColumnArray::Offsets & offsets_i = array->getOffsets(); if (i == 0) + { offsets = &offsets_i; + offsets_column = array->getOffsetsPtr(); + } else if (offsets_i != *offsets) throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); auto * array_data = &array->getData(); - data_columns.push_back(array_data); + data_columns[i] = array_data; } - size_t num_columns = data_columns.size(); - ColumnRawPtrs original_data_columns(num_columns); - ColumnRawPtrs null_maps(num_columns); + const NullMap * null_map = nullptr; - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < num_arguments; ++i) { - original_data_columns[i] = data_columns[i]; - if (data_columns[i]->isColumnNullable()) { - has_nullable_columns = true; const auto & nullable_col = static_cast(*data_columns[i]); - data_columns[i] = &nullable_col.getNestedColumn(); - null_maps[i] = &nullable_col.getNullMapColumn(); + + if (num_arguments == 1) + data_columns[i] = &nullable_col.getNestedColumn(); + + null_map = &nullable_col.getNullMapData(); + break; } - else - null_maps[i] = nullptr; } - const ColumnArray * first_array = checkAndGetColumn(block.getByPosition(arguments.at(0)).column.get()); - const IColumn * first_null_map = null_maps[0]; auto res_nested = ColumnUInt32::create(); ColumnUInt32::Container & res_values = res_nested->getData(); if (!offsets->empty()) res_values.resize(offsets->back()); - if (num_columns == 1) + if (num_arguments == 1) { - if (!(executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeString (first_array, first_null_map, res_values))) - executeHashed(*offsets, original_data_columns, res_values); + executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeString(*offsets, *data_columns[0], null_map, res_values) + || executeHashed(*offsets, data_columns, res_values); } else { - if (!execute128bit(*offsets, data_columns, null_maps, res_values, has_nullable_columns)) - executeHashed(*offsets, original_data_columns, res_values); + execute128bit(*offsets, data_columns, res_values) + || executeHashed(*offsets, data_columns, res_values); } - block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), first_array->getOffsetsPtr()); + block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), offsets_column); } template template -bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayEnumerateExtended::executeNumber( + const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnVector * nested = checkAndGetColumn>(inner_col); - if (!nested) + const ColumnVector * data_concrete = checkAndGetColumn>(&data); + if (!data_concrete) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); - const typename ColumnVector::Container & values = nested->getData(); + const auto & values = data_concrete->getData(); using ValuesToIndices = ClearableHashMap, HashTableGrower, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - ValuesToIndices indices; size_t prev_off = 0; if constexpr (std::is_same_v) @@ -207,7 +179,7 @@ bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) res_values[j] = ++null_count; else res_values[j] = ++indices[values[j]]; @@ -226,7 +198,7 @@ bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) { if (!null_index) null_index = ++rank; @@ -247,32 +219,17 @@ bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * } template -bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayEnumerateExtended::executeString( + const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnString * nested = checkAndGetColumn(inner_col); - if (!nested) + const ColumnString * values = checkAndGetColumn(&data); + if (!values) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); size_t prev_off = 0; using ValuesToIndices = ClearableHashMap, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - ValuesToIndices indices; if constexpr (std::is_same_v) { @@ -284,10 +241,10 @@ bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) res_values[j] = ++null_count; else - res_values[j] = ++indices[nested->getDataAt(j)]; + res_values[j] = ++indices[values->getDataAt(j)]; } prev_off = off; } @@ -303,7 +260,7 @@ bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) { if (!null_index) null_index = ++rank; @@ -311,7 +268,7 @@ bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * } else { - auto & idx = indices[nested->getDataAt(j)]; + auto & idx = indices[values->getDataAt(j)]; if (!idx) idx = ++rank; res_values[j] = idx; @@ -327,9 +284,7 @@ template bool FunctionArrayEnumerateExtended::execute128bit( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns) + ColumnUInt32::Container & res_values) { size_t count = columns.size(); size_t keys_bytes = 0; @@ -342,8 +297,6 @@ bool FunctionArrayEnumerateExtended::execute128bit( key_sizes[j] = columns[j]->sizeOfValueIfFixed(); keys_bytes += key_sizes[j]; } - if (has_nullable_columns) - keys_bytes += std::tuple_size>::value; if (keys_bytes > 16) return false; @@ -361,29 +314,7 @@ bool FunctionArrayEnumerateExtended::execute128bit( indices.clear(); size_t off = offsets[i]; for (size_t j = prev_off; j < off; ++j) - { - if (has_nullable_columns) - { - KeysNullMap bitmap{}; - - for (size_t i = 0; i < columns.size(); ++i) - { - if (null_maps[i]) - { - const auto & null_map = static_cast(*null_maps[i]).getData(); - if (null_map[j] == 1) - { - size_t bucket = i / 8; - size_t offset = i % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - res_values[j] = ++indices[packFixed(j, count, columns, key_sizes, bitmap)]; - } - else - res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; - } + res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; prev_off = off; } } @@ -397,35 +328,10 @@ bool FunctionArrayEnumerateExtended::execute128bit( size_t rank = 0; for (size_t j = prev_off; j < off; ++j) { - if (has_nullable_columns) - { - KeysNullMap bitmap{}; - - for (size_t i = 0; i < columns.size(); ++i) - { - if (null_maps[i]) - { - const auto & null_map = static_cast(*null_maps[i]).getData(); - if (null_map[j] == 1) - { - size_t bucket = i / 8; - size_t offset = i % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - auto &idx = indices[packFixed(j, count, columns, key_sizes, bitmap)]; - if (!idx) - idx = ++rank; - res_values[j] = idx; - } - else - { - auto &idx = indices[packFixed(j, count, columns, key_sizes)];; - if (!idx) - idx = ++rank; - res_values[j] = idx; - } + auto &idx = indices[packFixed(j, count, columns, key_sizes)];; + if (!idx) + idx = ++rank; + res_values[j] = idx; } prev_off = off; } @@ -435,7 +341,7 @@ bool FunctionArrayEnumerateExtended::execute128bit( } template -void FunctionArrayEnumerateExtended::executeHashed( +bool FunctionArrayEnumerateExtended::executeHashed( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) @@ -479,6 +385,8 @@ void FunctionArrayEnumerateExtended::executeHashed( prev_off = off; } } + + return true; } } diff --git a/dbms/src/Functions/arrayIndex.h b/dbms/src/Functions/arrayIndex.h index 14429a1a4a5..fc8a8a08e47 100644 --- a/dbms/src/Functions/arrayIndex.h +++ b/dbms/src/Functions/arrayIndex.h @@ -838,15 +838,9 @@ private: null_map_data, nullptr); else { - /// If item_arg is tuple and have constants. - if (ColumnPtr materialized_tuple = item_arg.convertToFullColumnIfConst()) - ArrayIndexGenericImpl::vector( - col_nested, col_array->getOffsets(), *materialized_tuple, col_res->getData(), - null_map_data, null_map_item); - else - ArrayIndexGenericImpl::vector( - col_nested, col_array->getOffsets(), item_arg, col_res->getData(), - null_map_data, null_map_item); + ArrayIndexGenericImpl::vector( + col_nested, col_array->getOffsets(), *item_arg.convertToFullColumnIfConst(), col_res->getData(), + null_map_data, null_map_item); } block.getByPosition(result).column = std::move(col_res); diff --git a/dbms/src/Functions/arrayMap.cpp b/dbms/src/Functions/arrayMap.cpp index 534aaf51666..9575f520262 100644 --- a/dbms/src/Functions/arrayMap.cpp +++ b/dbms/src/Functions/arrayMap.cpp @@ -23,9 +23,7 @@ struct ArrayMapImpl static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { - return mapped->isColumnConst() - ? ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr()) - : ColumnArray::create(mapped, array.getOffsetsPtr()); + return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr()); } }; diff --git a/dbms/src/Functions/arrayUniq.cpp b/dbms/src/Functions/arrayUniq.cpp index 3a90d76aae6..93af86a3441 100644 --- a/dbms/src/Functions/arrayUniq.cpp +++ b/dbms/src/Functions/arrayUniq.cpp @@ -63,37 +63,23 @@ private: static constexpr size_t INITIAL_SIZE_DEGREE = 9; template - bool executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values); - - bool execute128bit( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns); - - void executeHashed( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, - ColumnUInt32::Container & res_values); + bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values); + bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); + bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values); }; void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { - Columns array_columns(arguments.size()); const ColumnArray::Offsets * offsets = nullptr; - ColumnRawPtrs data_columns(arguments.size()); - ColumnRawPtrs original_data_columns(arguments.size()); - ColumnRawPtrs null_maps(arguments.size()); + size_t num_arguments = arguments.size(); + ColumnRawPtrs data_columns(num_arguments); - bool has_nullable_columns = false; - - for (size_t i = 0; i < arguments.size(); ++i) + Columns array_holders; + for (size_t i = 0; i < num_arguments; ++i) { - ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; + const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column; const ColumnArray * array = checkAndGetColumn(array_ptr.get()); if (!array) { @@ -101,14 +87,12 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen block.getByPosition(arguments[i]).column.get()); if (!const_array) throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() - + " of " + toString(i + 1) + getOrdinalSuffix(i + 1) + " argument of function " + getName(), + + " of " + toString(i + 1) + "-th argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - array_ptr = const_array->convertToFullColumn(); - array = static_cast(array_ptr.get()); + array_holders.emplace_back(const_array->convertToFullColumn()); + array = checkAndGetColumn(array_holders.back().get()); } - array_columns[i] = array_ptr; - const ColumnArray::Offsets & offsets_i = array->getOffsets(); if (i == 0) offsets = &offsets_i; @@ -116,78 +100,65 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - data_columns[i] = &array->getData(); - original_data_columns[i] = data_columns[i]; - - if (data_columns[i]->isColumnNullable()) - { - has_nullable_columns = true; - const auto & nullable_col = static_cast(*data_columns[i]); - data_columns[i] = &nullable_col.getNestedColumn(); - null_maps[i] = &nullable_col.getNullMapColumn(); - } - else - null_maps[i] = nullptr; + auto * array_data = &array->getData(); + data_columns[i] = array_data; } - const ColumnArray * first_array = static_cast(array_columns[0].get()); - const IColumn * first_null_map = null_maps[0]; - auto res = ColumnUInt32::create(); + const NullMap * null_map = nullptr; + for (size_t i = 0; i < num_arguments; ++i) + { + if (data_columns[i]->isColumnNullable()) + { + const auto & nullable_col = static_cast(*data_columns[i]); + + if (num_arguments == 1) + data_columns[i] = &nullable_col.getNestedColumn(); + + null_map = &nullable_col.getNullMapData(); + break; + } + } + + auto res = ColumnUInt32::create(); ColumnUInt32::Container & res_values = res->getData(); res_values.resize(offsets->size()); - if (arguments.size() == 1) + if (num_arguments == 1) { - if (!(executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeNumber(first_array, first_null_map, res_values) - || executeString(first_array, first_null_map, res_values))) - executeHashed(*offsets, original_data_columns, res_values); + executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeNumber(*offsets, *data_columns[0], null_map, res_values) + || executeString(*offsets, *data_columns[0], null_map, res_values) + || executeHashed(*offsets, data_columns, res_values); } else { - if (!execute128bit(*offsets, data_columns, null_maps, res_values, has_nullable_columns)) - executeHashed(*offsets, original_data_columns, res_values); + execute128bit(*offsets, data_columns, res_values) + || executeHashed(*offsets, data_columns, res_values); } block.getByPosition(result).column = std::move(res); } template -bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayUniq::executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnVector * nested = checkAndGetColumn>(inner_col); + const ColumnVector * nested = checkAndGetColumn>(&data); if (!nested) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); - const typename ColumnVector::Container & values = nested->getData(); + const auto & values = nested->getData(); using Set = ClearableHashSet, HashTableGrower, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - Set set; ColumnArray::Offset prev_off = 0; for (size_t i = 0; i < offsets.size(); ++i) @@ -197,7 +168,7 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * ColumnArray::Offset off = offsets[i]; for (ColumnArray::Offset j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) found_null = true; else set.insert(values[j]); @@ -209,31 +180,15 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * return true; } -bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayUniq::executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values) { - const IColumn * inner_col; - - const auto & array_data = array->getData(); - if (array_data.isColumnNullable()) - { - const auto & nullable_col = static_cast(array_data); - inner_col = &nullable_col.getNestedColumn(); - } - else - inner_col = &array_data; - - const ColumnString * nested = checkAndGetColumn(inner_col); + const ColumnString * nested = checkAndGetColumn(&data); if (!nested) return false; - const ColumnArray::Offsets & offsets = array->getOffsets(); using Set = ClearableHashSet, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; - const PaddedPODArray * null_map_data = nullptr; - if (null_map) - null_map_data = &static_cast(null_map)->getData(); - Set set; ColumnArray::Offset prev_off = 0; for (size_t i = 0; i < offsets.size(); ++i) @@ -243,7 +198,7 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * ColumnArray::Offset off = offsets[i]; for (ColumnArray::Offset j = prev_off; j < off; ++j) { - if (null_map_data && ((*null_map_data)[j] == 1)) + if (null_map && (*null_map)[j]) found_null = true; else set.insert(nested->getDataAt(j)); @@ -259,9 +214,7 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * bool FunctionArrayUniq::execute128bit( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, - const ColumnRawPtrs & null_maps, - ColumnUInt32::Container & res_values, - bool has_nullable_columns) + ColumnUInt32::Container & res_values) { size_t count = columns.size(); size_t keys_bytes = 0; @@ -274,8 +227,6 @@ bool FunctionArrayUniq::execute128bit( key_sizes[j] = columns[j]->sizeOfValueIfFixed(); keys_bytes += key_sizes[j]; } - if (has_nullable_columns) - keys_bytes += std::tuple_size>::value; if (keys_bytes > 16) return false; @@ -283,19 +234,6 @@ bool FunctionArrayUniq::execute128bit( using Set = ClearableHashSet, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; - /// Suppose that, for a given row, each of the N columns has an array whose length is M. - /// Denote arr_i each of these arrays (1 <= i <= N). Then the following is performed: - /// - /// col1 ... colN - /// - /// arr_1[1], ..., arr_N[1] -> pack into a binary blob b1 - /// . - /// . - /// . - /// arr_1[M], ..., arr_N[M] -> pack into a binary blob bM - /// - /// Each binary blob is inserted into a hash table. - /// Set set; ColumnArray::Offset prev_off = 0; for (ColumnArray::Offset i = 0; i < offsets.size(); ++i) @@ -303,29 +241,7 @@ bool FunctionArrayUniq::execute128bit( set.clear(); ColumnArray::Offset off = offsets[i]; for (ColumnArray::Offset j = prev_off; j < off; ++j) - { - if (has_nullable_columns) - { - KeysNullMap bitmap{}; - - for (ColumnArray::Offset i = 0; i < columns.size(); ++i) - { - if (null_maps[i]) - { - const auto & null_map = static_cast(*null_maps[i]).getData(); - if (null_map[j] == 1) - { - ColumnArray::Offset bucket = i / 8; - ColumnArray::Offset offset = i % 8; - bitmap[bucket] |= UInt8(1) << offset; - } - } - } - set.insert(packFixed(j, count, columns, key_sizes, bitmap)); - } - else - set.insert(packFixed(j, count, columns, key_sizes)); - } + set.insert(packFixed(j, count, columns, key_sizes)); res_values[i] = set.size(); prev_off = off; @@ -334,7 +250,7 @@ bool FunctionArrayUniq::execute128bit( return true; } -void FunctionArrayUniq::executeHashed( +bool FunctionArrayUniq::executeHashed( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) @@ -356,6 +272,8 @@ void FunctionArrayUniq::executeHashed( res_values[i] = set.size(); prev_off = off; } + + return true; } diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index 42e8b65eb05..41a5277401c 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -638,9 +638,7 @@ private: static ColumnPtr materializeColumnIfConst(const ColumnPtr & column) { - if (ColumnPtr res = column->convertToFullColumnIfConst()) - return res; - return column; + return column->convertToFullColumnIfConst(); } static ColumnPtr makeNullableColumnIfNot(const ColumnPtr & column) diff --git a/dbms/src/Functions/materialize.cpp b/dbms/src/Functions/materialize.cpp index 4d3a1a57a77..552a2456f53 100644 --- a/dbms/src/Functions/materialize.cpp +++ b/dbms/src/Functions/materialize.cpp @@ -34,11 +34,7 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { - const auto & src = block.getByPosition(arguments[0]).column; - if (ColumnPtr converted = src->convertToFullColumnIfConst()) - block.getByPosition(result).column = converted; - else - block.getByPosition(result).column = src; + block.getByPosition(result).column = block.getByPosition(arguments[0]).column->convertToFullColumnIfConst(); } }; diff --git a/dbms/src/Functions/regexpQuoteMeta.cpp b/dbms/src/Functions/regexpQuoteMeta.cpp index cc8f1791578..0ae0a0da97d 100644 --- a/dbms/src/Functions/regexpQuoteMeta.cpp +++ b/dbms/src/Functions/regexpQuoteMeta.cpp @@ -81,7 +81,7 @@ public: while (true) { - const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); + const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end); size_t bytes_to_copy = next_src_pos - src_pos; size_t old_dst_size = dst_data.size(); diff --git a/dbms/src/Functions/trim.cpp b/dbms/src/Functions/trim.cpp index 0484f369361..ff8c7e536ce 100644 --- a/dbms/src/Functions/trim.cpp +++ b/dbms/src/Functions/trim.cpp @@ -92,7 +92,9 @@ private: { #if __SSE4_2__ /// skip whitespace from left in blocks of up to 16 characters - constexpr auto left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT; + + /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate + enum { left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT }; while (mask == bytes_sse && chars_to_trim_left < size_sse) { const auto chars = _mm_loadu_si128(reinterpret_cast(data + chars_to_trim_left)); @@ -110,7 +112,9 @@ private: const auto trim_right_size = size - chars_to_trim_left; #if __SSE4_2__ /// try to skip whitespace from right in blocks of up to 16 characters - constexpr auto right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT; + + /// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate + enum { right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT }; const auto trim_right_size_sse = trim_right_size - (trim_right_size % bytes_sse); while (mask == bytes_sse && chars_to_trim_right < trim_right_size_sse) { diff --git a/dbms/src/Functions/tuple.cpp b/dbms/src/Functions/tuple.cpp index 884ec880ffb..dcffff0ae9f 100644 --- a/dbms/src/Functions/tuple.cpp +++ b/dbms/src/Functions/tuple.cpp @@ -65,14 +65,11 @@ public: Columns tuple_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) { - tuple_columns[i] = block.getByPosition(arguments[i]).column; - /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ - if (ColumnPtr converted = tuple_columns[i]->convertToFullColumnIfConst()) - tuple_columns[i] = converted; + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ + tuple_columns[i] = block.getByPosition(arguments[i]).column->convertToFullColumnIfConst(); } block.getByPosition(result).column = ColumnTuple::create(tuple_columns); } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 03f04d791a0..d90da905c2e 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -772,13 +772,8 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re /// Remember the columns we will work with for (size_t i = 0; i < params.keys_size; ++i) { - key_columns[i] = block.safeGetByPosition(params.keys[i]).column.get(); - - if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) - { - materialized_columns.push_back(converted); - key_columns[i] = materialized_columns.back().get(); - } + materialized_columns.push_back(block.safeGetByPosition(params.keys[i]).column->convertToFullColumnIfConst()); + key_columns[i] = materialized_columns.back().get(); if (const auto * low_cardinality_column = typeid_cast(key_columns[i])) { @@ -797,13 +792,8 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re { for (size_t j = 0; j < aggregate_columns[i].size(); ++j) { - aggregate_columns[i][j] = block.safeGetByPosition(params.aggregates[i].arguments[j]).column.get(); - - if (ColumnPtr converted = aggregate_columns[i][j]->convertToFullColumnIfConst()) - { - materialized_columns.push_back(converted); - aggregate_columns[i][j] = materialized_columns.back().get(); - } + materialized_columns.push_back(block.safeGetByPosition(params.aggregates[i].arguments[j]).column->convertToFullColumnIfConst()); + aggregate_columns[i][j] = materialized_columns.back().get(); if (auto * col_low_cardinality = typeid_cast(aggregate_columns[i][j])) { diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 63f94591b05..0714c8a954a 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -375,10 +375,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const if (array_joined_columns.empty()) throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); - ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column; - if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst()) - any_array_ptr = converted; - + ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column->convertToFullColumnIfConst(); const ColumnArray * any_array = typeid_cast(&*any_array_ptr); if (!any_array) throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH); @@ -416,10 +413,10 @@ void ExpressionAction::execute(Block & block, bool dry_run) const Block tmp_block{src_col, column_of_max_length, {{}, src_col.type, {}}}; function_arrayResize->build({src_col, column_of_max_length})->execute(tmp_block, {0, 1}, 2, rows); - any_array_ptr = src_col.column = tmp_block.safeGetByPosition(2).column; + src_col.column = tmp_block.safeGetByPosition(2).column; + any_array_ptr = src_col.column->convertToFullColumnIfConst(); } - if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst()) - any_array_ptr = converted; + any_array = typeid_cast(&*any_array_ptr); } else if (array_join_is_left && !unaligned_array_join) @@ -434,10 +431,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const non_empty_array_columns[name] = tmp_block.safeGetByPosition(1).column; } - any_array_ptr = non_empty_array_columns.begin()->second; - if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst()) - any_array_ptr = converted; - + any_array_ptr = non_empty_array_columns.begin()->second->convertToFullColumnIfConst(); any_array = &typeid_cast(*any_array_ptr); } @@ -452,9 +446,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH); ColumnPtr array_ptr = (array_join_is_left && !unaligned_array_join) ? non_empty_array_columns[current.name] : current.column; - - if (ColumnPtr converted = array_ptr->convertToFullColumnIfConst()) - array_ptr = converted; + array_ptr = array_ptr->convertToFullColumnIfConst(); const ColumnArray & array = typeid_cast(*array_ptr); if (!unaligned_array_join && !array.hasEqualOffsets(typeid_cast(*any_array_ptr))) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index d688a158562..9ab5f26f9e3 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -437,14 +437,8 @@ bool Join::insertFromBlock(const Block & block) /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column)); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst())); key_columns[i] = materialized_columns.back().get(); - - if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns[i] = materialized_columns.back().get(); - } } /// We will insert to the map only keys, where all components are not NULL. @@ -483,11 +477,7 @@ bool Join::insertFromBlock(const Block & block) /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. for (size_t i = 0; i < size; ++i) - { - ColumnPtr col = stored_block->safeGetByPosition(i).column; - if (ColumnPtr converted = col->convertToFullColumnIfConst()) - stored_block->safeGetByPosition(i).column = converted; - } + stored_block->safeGetByPosition(i).column = stored_block->safeGetByPosition(i).column->convertToFullColumnIfConst(); /// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable. if (use_nulls && (kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Full)) @@ -685,14 +675,8 @@ void Join::joinBlockImpl( /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column)); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst())); key_columns[i] = materialized_columns.back().get(); - - if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns[i] = materialized_columns.back().get(); - } } /// Keys with NULL value in any column won't join to anything. @@ -710,10 +694,7 @@ void Join::joinBlockImpl( { for (size_t i = 0; i < existing_columns; ++i) { - auto & col = block.getByPosition(i).column; - - if (ColumnPtr converted = col->convertToFullColumnIfConst()) - col = converted; + block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); /// If use_nulls, convert left columns (except keys) to Nullable. if (use_nulls) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 6bc99f3355d..022cd9bd404 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -121,15 +121,10 @@ void Set::setHeader(const Block & block) /// Remember the columns we will work with for (size_t i = 0; i < keys_size; ++i) { - key_columns.emplace_back(block.safeGetByPosition(i).column.get()); + materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()); + key_columns.emplace_back(materialized_columns.back().get()); data_types.emplace_back(block.safeGetByPosition(i).type); - if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns.back() = materialized_columns.back().get(); - } - /// Convert low cardinality column to full. if (auto * low_cardinality_type = typeid_cast(data_types.back().get())) { @@ -175,20 +170,8 @@ bool Set::insertFromBlock(const Block & block) /// Remember the columns we will work with for (size_t i = 0; i < keys_size; ++i) { - key_columns.emplace_back(block.safeGetByPosition(i).column.get()); - - if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns.back() = materialized_columns.back().get(); - } - - /// Convert low cardinality column to full. - if (key_columns.back()->lowCardinality()) - { - materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality()); - key_columns.back() = materialized_columns.back().get(); - } + materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality()); + key_columns.emplace_back(materialized_columns.back().get()); } size_t rows = block.rows(); @@ -365,18 +348,13 @@ ColumnPtr Set::execute(const Block & block, bool negative) const for (size_t i = 0; i < num_key_columns; ++i) { - key_columns.push_back(block.safeGetByPosition(i).column.get()); - if (!removeNullable(data_types[i])->equals(*removeNullable(block.safeGetByPosition(i).type))) throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: " + data_types[i]->getName() + " on the right, " + block.safeGetByPosition(i).type->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH); - if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) - { - materialized_columns.emplace_back(converted); - key_columns.back() = materialized_columns.back().get(); - } + materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()); + key_columns.emplace_back() = materialized_columns.back().get(); } /// We will check existence in Set only for keys, where all components are not NULL. diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 0b330fb00cc..33dce42ab8e 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -67,8 +67,7 @@ void evaluateMissingDefaults(Block & block, if (copy_block.has(col->name)) { auto evaluated_col = copy_block.getByName(col->name); - if (ColumnPtr converted = evaluated_col.column->convertToFullColumnIfConst()) - evaluated_col.column = converted; + evaluated_col.column = evaluated_col.column->convertToFullColumnIfConst(); block.insert(pos, std::move(evaluated_col)); } diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index feec84d6e98..c5cdf1475e3 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -51,7 +51,7 @@ void ASTAlterCommand::formatImpl( if (type == ASTAlterCommand::ADD_COLUMN) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); /// AFTER @@ -64,7 +64,7 @@ void ASTAlterCommand::formatImpl( else if (type == ASTAlterCommand::DROP_COLUMN) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str - << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (settings.hilite ? hilite_none : ""); + << (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); column->formatImpl(settings, state, frame); if (partition) { @@ -74,7 +74,7 @@ void ASTAlterCommand::formatImpl( } else if (type == ASTAlterCommand::MODIFY_COLUMN) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); } else if (type == ASTAlterCommand::MODIFY_ORDER_BY) diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index e1cd74cb4b5..a6759482a56 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -78,6 +78,10 @@ public: bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata) + bool if_not_exists = false; /// option for ADD_COLUMN + + bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN + /** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition. */ String from; diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 83ad42ebbcb..b17467ed365 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -36,6 +36,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_partition("PARTITION"); ParserKeyword s_after("AFTER"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_if_exists("IF EXISTS"); ParserKeyword s_from("FROM"); ParserKeyword s_in_partition("IN PARTITION"); ParserKeyword s_with("WITH"); @@ -57,6 +59,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (s_add_column.ignore(pos, expected)) { + if (s_if_not_exists.ignore(pos, expected)) + command->if_not_exists = true; + if (!parser_col_decl.parse(pos, command->col_decl, expected)) return false; @@ -77,6 +82,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_drop_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_name.parse(pos, command->column, expected)) return false; @@ -85,6 +93,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_clear_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_name.parse(pos, command->column, expected)) return false; @@ -190,6 +201,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_modify_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_modify_col_decl.parse(pos, command->col_decl, expected)) return false; @@ -224,6 +238,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_comment_column.ignore(pos, expected)) { + if (s_if_exists.ignore(pos, expected)) + command->if_exists = true; + if (!parser_name.parse(pos, command->column, expected)) return false; diff --git a/dbms/src/Parsers/ParserAlterQuery.h b/dbms/src/Parsers/ParserAlterQuery.h index 2eecfaf20d6..282a4277e17 100644 --- a/dbms/src/Parsers/ParserAlterQuery.h +++ b/dbms/src/Parsers/ParserAlterQuery.h @@ -8,12 +8,12 @@ namespace DB /** Query like this: * ALTER TABLE [db.]name [ON CLUSTER cluster] - * [ADD COLUMN col_name type [AFTER col_after],] - * [DROP COLUMN col_to_drop, ...] - * [CLEAR COLUMN col_to_clear [IN PARTITION partition],] - * [MODIFY COLUMN col_to_modify type, ...] + * [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],] + * [DROP COLUMN [IF EXISTS] col_to_drop, ...] + * [CLEAR COLUMN [IF EXISTS] col_to_clear [IN PARTITION partition],] + * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...] * [MODIFY PRIMARY KEY (a, b, c...)] - * [COMMENT COLUMN col_name string] + * [COMMENT COLUMN [IF EXISTS] col_name string] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [FETCH PARTITION partition FROM ...] * [FREEZE [PARTITION] [WITH NAME name]] diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index ba5ed9e21bd..332ccfde3f0 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -52,6 +52,8 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (command_ast->column) command.after_column = typeid_cast(*command_ast->column).name; + command.if_not_exists = command_ast->if_not_exists; + return command; } else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && !command_ast->partition) @@ -62,6 +64,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.type = AlterCommand::DROP_COLUMN; command.column_name = typeid_cast(*(command_ast->column)).name; + command.if_exists = command_ast->if_exists; return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN) @@ -88,6 +91,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ const auto & ast_comment = typeid_cast(*ast_col_decl.comment); command.comment = ast_comment.value.get(); } + command.if_exists = command_ast->if_exists; return command; } @@ -99,6 +103,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.column_name = ast_identifier.name; const auto & ast_comment = typeid_cast(*command_ast->comment); command.comment = ast_comment.value.get(); + command.if_exists = command_ast->if_exists; return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_ORDER_BY) @@ -300,7 +305,8 @@ void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & ord auto new_primary_key_ast = primary_key_ast; for (const AlterCommand & command : *this) - command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast); + if (!command.ignore) + command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast); columns_description = std::move(new_columns_description); order_by_ast = std::move(new_order_by_ast); @@ -328,45 +334,61 @@ void AlterCommands::validate(const IStorage & table, const Context & context) if (command.type == AlterCommand::ADD_COLUMN) { if (std::end(all_columns) != column_it) - throw Exception{"Cannot add column " + column_name + ": column with this name already exists", ErrorCodes::ILLEGAL_COLUMN}; + { + if (command.if_not_exists) + command.ignore = true; + else + throw Exception{"Cannot add column " + column_name + ": column with this name already exists", ErrorCodes::ILLEGAL_COLUMN}; + } } else if (command.type == AlterCommand::MODIFY_COLUMN) { if (std::end(all_columns) == column_it) - throw Exception{"Wrong column name. Cannot find column " + column_name + " to modify", ErrorCodes::ILLEGAL_COLUMN}; + { + if (command.if_exists) + command.ignore = true; + else + throw Exception{"Wrong column name. Cannot find column " + column_name + " to modify", ErrorCodes::ILLEGAL_COLUMN}; + } - all_columns.erase(column_it); - defaults.erase(column_name); + if (!command.ignore) + { + all_columns.erase(column_it); + defaults.erase(column_name); + } } - /// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions - all_columns.emplace_back(column_name, command.data_type ? command.data_type : std::make_shared()); - - if (command.default_expression) + if (!command.ignore) { - if (command.data_type) + /// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions + all_columns.emplace_back(column_name, command.data_type ? command.data_type : std::make_shared()); + + if (command.default_expression) { - const auto & final_column_name = column_name; - const auto tmp_column_name = final_column_name + "_tmp"; - const auto column_type_raw_ptr = command.data_type.get(); + if (command.data_type) + { + const auto &final_column_name = column_name; + const auto tmp_column_name = final_column_name + "_tmp"; + const auto column_type_raw_ptr = command.data_type.get(); - default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(tmp_column_name), - std::make_shared(column_type_raw_ptr->getName())), - final_column_name)); + default_expr_list->children.emplace_back(setAlias( + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(column_type_raw_ptr->getName())), + final_column_name)); - default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); + default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); - defaulted_columns.emplace_back(NameAndTypePair{column_name, command.data_type}, &command); - } - else - { - /// no type explicitly specified, will deduce later - default_expr_list->children.emplace_back( - setAlias(command.default_expression->clone(), column_name)); + defaulted_columns.emplace_back(NameAndTypePair{column_name, command.data_type}, &command); + } + else + { + /// no type explicitly specified, will deduce later + default_expr_list->children.emplace_back( + setAlias(command.default_expression->clone(), column_name)); - defaulted_columns.emplace_back(NameAndTypePair{column_name, nullptr}, &command); + defaulted_columns.emplace_back(NameAndTypePair{column_name, nullptr}, &command); + } } } } @@ -407,8 +429,13 @@ void AlterCommands::validate(const IStorage & table, const Context & context) } if (!found) - throw Exception("Wrong column name. Cannot find column " + command.column_name + " to drop", - ErrorCodes::ILLEGAL_COLUMN); + { + if (command.if_exists) + command.ignore = true; + else + throw Exception("Wrong column name. Cannot find column " + command.column_name + " to drop", + ErrorCodes::ILLEGAL_COLUMN); + } } else if (command.type == AlterCommand::COMMENT_COLUMN) { @@ -416,7 +443,10 @@ void AlterCommands::validate(const IStorage & table, const Context & context) std::bind(namesEqual, std::cref(command.column_name), std::placeholders::_1)); if (column_it == std::end(all_columns)) { - throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN}; + if (command.if_exists) + command.ignore = true; + else + throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN}; } } } diff --git a/dbms/src/Storages/AlterCommands.h b/dbms/src/Storages/AlterCommands.h index f1adbdaf9b0..f067d3811cc 100644 --- a/dbms/src/Storages/AlterCommands.h +++ b/dbms/src/Storages/AlterCommands.h @@ -43,15 +43,26 @@ struct AlterCommand /// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible. String after_column; + /// For DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN + bool if_exists; + + /// For ADD_COLUMN + bool if_not_exists; + /// For MODIFY_ORDER_BY ASTPtr order_by; + /// indicates that this command should not be applied, for example in case of if_exists=true and column doesn't exist. + bool ignore = false; + AlterCommand() = default; AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type, const ColumnDefaultKind default_kind, const ASTPtr & default_expression, - const String & after_column = String{}, const String & comment = "") // TODO: разобраться здесь с параметром по умолчанию + const String & after_column = String{}, const String & comment = "", + const bool if_exists = false, const bool if_not_exists = false) // TODO: разобраться здесь с параметром по умолчанию : type{type}, column_name{column_name}, data_type{data_type}, default_kind{default_kind}, - default_expression{default_expression}, comment(comment), after_column{after_column} + default_expression{default_expression}, comment(comment), after_column{after_column}, + if_exists(if_exists), if_not_exists(if_not_exists) {} static std::optional parse(const ASTAlterCommand * command); diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index ba7f7005d80..d78a7a36727 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -166,9 +166,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c /// Filter the block. String filter_column_name = expression_ast->getColumnName(); - ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column; - if (ColumnPtr converted = filter_column->convertToFullColumnIfConst()) - filter_column = converted; + ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullColumnIfConst(); const IColumn::Filter & filter = typeid_cast(*filter_column).getData(); for (size_t i = 0; i < block.columns(); ++i) diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/__init__.py b/dbms/tests/integration/test_match_process_uid_against_data_owner/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml new file mode 100644 index 00000000000..1bd6dc45549 --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml @@ -0,0 +1,17 @@ + + + + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + + 9000 + 127.0.0.1 + + /var/lib/clickhouse/ + + 5368709120 + users.xml + diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml new file mode 100644 index 00000000000..2a54396feca --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/configs/users.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py new file mode 100644 index 00000000000..77cbe25ff80 --- /dev/null +++ b/dbms/tests/integration/test_match_process_uid_against_data_owner/test.py @@ -0,0 +1,37 @@ +import docker +import os +import pwd +import pytest +import re + +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_START_COMMAND + + +def test_different_user(): + current_user_id = os.getuid() + + if current_user_id != 0: + return + + other_user_id = pwd.getpwnam('nobody').pw_uid + + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance('node') + + cluster.start() + + docker_api = docker.from_env().api + container = node.get_docker_handle() + container.stop() + container.start() + container.exec_run('chown {} /var/lib/clickhouse'.format(other_user_id), privileged=True) + container.exec_run(CLICKHOUSE_START_COMMAND) + + cluster.shutdown() # cleanup + + with open(os.path.join(node.path, 'logs/clickhouse-server.err.log')) as log: + expected_message = "Effective user of the process \(.*\) does not match the owner of the data \(.*\)\. Run under 'sudo -u .*'\." + last_message = log.readlines()[-1].strip() + + if re.search(expected_message, last_message) is None: + pytest.fail('Expected the server to fail with a message "{}", but the last message is "{}"'.format(expected_message, last_message)) diff --git a/dbms/tests/queries/0_stateless/00030_alter_table.sql b/dbms/tests/queries/0_stateless/00030_alter_table.sql index 231840818cf..cc5789b4040 100644 --- a/dbms/tests/queries/0_stateless/00030_alter_table.sql +++ b/dbms/tests/queries/0_stateless/00030_alter_table.sql @@ -23,6 +23,13 @@ ALTER TABLE test.alter_test DROP COLUMN NestedColumn.S; ALTER TABLE test.alter_test DROP COLUMN AddedNested1.B; +ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS Added0 UInt32; +ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS AddedNested1 Nested(A UInt32, B UInt64); +ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS AddedNested1.C Array(String); +ALTER TABLE test.alter_test MODIFY COLUMN IF EXISTS ToDrop UInt64; +ALTER TABLE test.alter_test DROP COLUMN IF EXISTS ToDrop; +ALTER TABLE test.alter_test COMMENT COLUMN IF EXISTS ToDrop 'new comment'; + DESC TABLE test.alter_test; SELECT * FROM test.alter_test; diff --git a/dbms/tests/queries/0_stateless/00803_xxhash.reference b/dbms/tests/queries/0_stateless/00803_xxhash.reference new file mode 100644 index 00000000000..7432b657191 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00803_xxhash.reference @@ -0,0 +1,54 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00803_xxhash.sql b/dbms/tests/queries/0_stateless/00803_xxhash.sql new file mode 100644 index 00000000000..2e6a1710e3c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00803_xxhash.sql @@ -0,0 +1,77 @@ +SELECT hex(xxHash64('')) = upper('ef46db3751d8e999'); +SELECT hex(xxHash32('')) = upper('02cc5d05'); + +SELECT hex(xxHash64('ABC')) = upper('e66ae7354fcfee98'); +SELECT hex(xxHash32('ABC')) = upper('80712ed5'); + +SELECT hex(xxHash64('xxhash')) = upper('32dd38952c4bc720'); + +-- + +SELECT xxHash64(NULL) is NULL; +SELECT xxHash64() = toUInt64(16324913028386710556); + +SELECT xxHash64(0) = toUInt64(16804241149081757544); +SELECT xxHash64(123456) = toUInt64(9049736899514479480); + +select xxHash64(toUInt8(0)) = xxHash64('\0'); +select xxHash64(toUInt16(0)) = xxHash64('\0\0'); +select xxHash64(toUInt32(0)) = xxHash64('\0\0\0\0'); +select xxHash64(toUInt64(0)) = xxHash64('\0\0\0\0\0\0\0\0'); + +SELECT xxHash64(CAST(3 AS UInt8)) = toUInt64(2244420788148980662); +SELECT xxHash64(CAST(1.2684 AS Float32)) = toUInt64(6662491266811474554); +SELECT xxHash64(CAST(-154477 AS Int64)) = toUInt64(1162348840373071858); + +SELECT xxHash64('') = toUInt64(17241709254077376921); +SELECT xxHash64('foo') = toUInt64(3728699739546630719); +SELECT xxHash64(CAST('foo' AS FixedString(3))) = xxHash64('foo'); +SELECT xxHash64(CAST('bar' AS FixedString(3))) = toUInt64(5234164152756840025); +SELECT xxHash64(x) = toUInt64(9962287286179718960) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x); + +SELECT xxHash64('\x01') = toUInt64(9962287286179718960); +SELECT xxHash64('\x02\0') = toUInt64(6482051057365497128); +SELECT xxHash64('\x03\0\0\0') = toUInt64(13361037350151369407); + +SELECT xxHash64(1) = toUInt64(9962287286179718960); +SELECT xxHash64(toUInt16(2)) = toUInt64(6482051057365497128); +SELECT xxHash64(toUInt32(3)) = toUInt64(13361037350151369407); + +SELECT xxHash64(1, 2, 3) = toUInt64(13728743482242651702); +SELECT xxHash64(1, 3, 2) = toUInt64(10226792638577471533); +SELECT xxHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt64(3521288460171939489); + +-- + +SELECT xxHash32(NULL) is NULL; +SELECT xxHash32() = toUInt32(4263699484); + +SELECT xxHash32(0) = toUInt32(3479547966); +SELECT xxHash32(123456) = toUInt32(1434661961); + +select xxHash32(toUInt8(0)) = xxHash32('\0'); +select xxHash32(toUInt16(0)) = xxHash32('\0\0'); +select xxHash32(toUInt32(0)) = xxHash32('\0\0\0\0'); + +SELECT xxHash32(CAST(3 AS UInt8)) = toUInt32(565077562); +SELECT xxHash32(CAST(1.2684 AS Float32)) = toUInt32(3120514536); +SELECT xxHash32(CAST(-154477 AS Int32)) = toUInt32(3279223048); + +SELECT xxHash32('') = toUInt32(46947589); +SELECT xxHash32('foo') = toUInt32(3792637401); +SELECT xxHash32(CAST('foo' AS FixedString(3))) = xxHash32('foo'); +SELECT xxHash32(CAST('bar' AS FixedString(3))) = toUInt32(1101146924); +SELECT xxHash32(x) = toUInt32(949155633) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x); + +SELECT xxHash32('\x01') = toUInt32(949155633); +SELECT xxHash32('\x02\0') = toUInt32(332955956); +SELECT xxHash32('\x03\0\0\0') = toUInt32(2158931063); + +SELECT xxHash32(1) = toUInt32(949155633); +SELECT xxHash32(toUInt16(2)) = toUInt32(332955956); +SELECT xxHash32(toUInt32(3)) = toUInt32(2158931063); + +SELECT xxHash32(1, 2, 3) = toUInt32(441104368); +SELECT xxHash32(1, 3, 2) = toUInt32(912264289); +SELECT xxHash32(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt32(1930126291); + diff --git a/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.reference b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.reference new file mode 100644 index 00000000000..ad1021a5788 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.reference @@ -0,0 +1,2 @@ +[] +[1] diff --git a/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql new file mode 100644 index 00000000000..b492d3114f8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00808_array_enumerate_segfault.sql @@ -0,0 +1,4 @@ +SET send_logs_level = 'none'; +SELECT arrayEnumerateUniq(anyHeavy([]), []); +SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); -- { serverError 190 } +SELECT arrayEnumerateDense([STDDEV_SAMP(NULL, 910947.571364)], [NULL]); diff --git a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql new file mode 100644 index 00000000000..1fa525eaccc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00810_in_operators_segfault.sql @@ -0,0 +1,5 @@ +SET send_logs_level = 'none'; + +SELECT globalNotIn(['"wh'], [NULL]); -- { serverError 53 } +SELECT globalIn([''], [NULL]); -- { serverError 53 } +SELECT notIn([['']], [[NULL]]); -- { serverError 53 } diff --git a/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.reference b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.reference @@ -0,0 +1 @@ +0 diff --git a/dbms/tests/queries/bugs/prewhere_alias_array.sql b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.sql similarity index 76% rename from dbms/tests/queries/bugs/prewhere_alias_array.sql rename to dbms/tests/queries/0_stateless/00812_prewhere_alias_array.sql index 3281c6ac0c4..0679623194f 100644 --- a/dbms/tests/queries/bugs/prewhere_alias_array.sql +++ b/dbms/tests/queries/0_stateless/00812_prewhere_alias_array.sql @@ -1,4 +1,4 @@ DROP TABLE IF EXISTS test.prewhere; -CREATE TABLE test.prewhere (x Array(UInt64), y ALIAS x, s String) ENGINE = MergeTree ORDER BY tuple() -SELECT count() FROM test.prewhere PREWHERE (length(s) >= 1) = 0 WHERE NOT ignore(y) +CREATE TABLE test.prewhere (x Array(UInt64), y ALIAS x, s String) ENGINE = MergeTree ORDER BY tuple(); +SELECT count() FROM test.prewhere PREWHERE (length(s) >= 1) = 0 WHERE NOT ignore(y); DROP TABLE test.prewhere; diff --git a/dbms/tests/queries/bugs/fuzzy.sql b/dbms/tests/queries/bugs/fuzzy.sql index f81140ba8c9..1468ed648b2 100644 --- a/dbms/tests/queries/bugs/fuzzy.sql +++ b/dbms/tests/queries/bugs/fuzzy.sql @@ -1,11 +1,6 @@ SELECT sequenceCount((CAST((( SELECT NULL ) AS rg, ( SELECT ( SELECT [], 'A') AS String))]]); SELECT truncate(895, -16); -SELECT arrayEnumerateUniq(anyHeavy([]), []); -SELECT notIn([['']], [[NULL]]); SELECT subtractDays((CAST((-5263074.47) AS DateTime)), -737895); SELECT quantileDeterministic([], findClusterIndex(( SELECT subtractDays((CAST((566450.398706) AS DateTime)), 54) ) )), '\0', []; SELECT addDays((CAST((96.338) AS DateTime)), -3); -SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 5455a234ae3..d9a44f78ea3 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -9,13 +9,13 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `dbms/src/tests/queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. To run all tests, use `dbms/tests/clickhouse-test` tool. Look `--help` for the list of possible options. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory. -To add new test, create a `.sql` or `.sh` file in `dbms/src/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `dbms/src/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. @@ -24,6 +24,11 @@ If you want to use distributed queries in functional tests, you can leverage `re Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper; `shard` is for tests that requires server to listen `127.0.0.*`; `long` is for tests that run slightly longer that one second. +## Known bugs + +If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `dbms/src/tests/queries/bugs` directory. These tests will be moved to `dbms/src/tests/queries/0_stateless` when bugs are fixed. + + ## Integration Tests Integration tests allow to test ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software. @@ -55,7 +60,7 @@ Performance tests are not run on per-commit basis. Results of performance tests Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `dbms/src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing. -You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. There kind of tests are not automated. +You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. These kind of tests are not automated. ## Miscellanous Tests @@ -173,7 +178,7 @@ For production builds, gcc is used (it still generates slightly more efficient c ## Sanitizers **Address sanitizer**. -We run functional tests under ASan on per-commit basis. +We run functional and integration tests under ASan on per-commit basis. **Valgrind (Memcheck)**. We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse). @@ -185,7 +190,7 @@ We run functional tests under TSan. ClickHouse must pass all tests. Run under TS Currently we still don't use MSan. **Undefined behaviour sanitizer.** -We still don't use UBSan. The only thing to fix is unaligned placement of structs in Arena during aggregation. This is totally fine, we only have to force alignment under UBSan. +We still don't use UBSan on per commit basis. There are some places to fix. **Debug allocator.** You can enable debug version of `tcmalloc` with `DEBUG_TCMALLOC` CMake option. We run tests with debug allocator on per-commit basis. @@ -195,7 +200,9 @@ You will find some additional details in `dbms/tests/instructions/sanitizers.txt ## Fuzzing -As of July 2018 we don't use fuzzing. +We use simple fuzz test to generate random SQL queries and to check that the server doesn't die. Fuzz testing is performed with Address sanitizer. You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer). + +As of December 2018, we still don't use isolated fuzz testing of library code. ## Security Audit @@ -242,12 +249,12 @@ As of July 2018 we don't track test coverage. ## Test Automation -We run tests with Travis CI (available for general public) and Jenkins (available inside Yandex). +We run tests with Yandex internal CI and job automation system named "Sandbox". We also continue to use Jenkins (available inside Yandex). -In Travis CI due to limit on time and computational power we can afford only subset of functional tests that are run with limited build of ClickHouse (debug version with cut off most of libraries). In about half of runs it still fails to finish in 50 minutes timeout. The only advantage - test results are visible for all external contributors. +Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored eternally. When you send a pull request on GitHub, we tag it as "can be tested" and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you. -In Jenkins we run functional tests for each commit and for each pull request from trusted users; the same under ASan; we also run quorum tests, dictionary tests, Metrica B2B tests. We use Jenkins to prepare and publish releases. Worth to note that we are not happy with Jenkins at all. +We don't use Travis CI due to the limit on time and computational power. -One of our goals is to provide reliable testing infrastructure that will be available to community. +In Jenkins we run dictionary tests, Metrica B2B tests. We use Jenkins to prepare and publish releases. Jenkins is a legacy technology and all jobs will be moved to Sandbox. [Original article](https://clickhouse.yandex/docs/en/development/tests/) diff --git a/utils/check-style/check-include-stat b/utils/check-style/check-include-stat index 9dc9b7e7e9a..13588feb44d 100755 --- a/utils/check-style/check-include-stat +++ b/utils/check-style/check-include-stat @@ -16,10 +16,12 @@ sh ${CUR_DIR}check-include > $RESULT_FILE 2>&1 echo Results: echo Top by memory: -cat $RESULT_FILE | sort -rk4 | head -n20 +cat $RESULT_FILE | sort -nrk4 | head -n20 echo Top by time: -cat $RESULT_FILE | sort -rk3 | head -n20 +cat $RESULT_FILE | sort -nrk3 | head -n20 echo Top by includes: -cat $RESULT_FILE | sort -rk2 | head -n20 +cat $RESULT_FILE | sort -nrk2 | head -n20 + +trap "" EXIT