Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
Ivan Blinkov 2018-12-24 11:15:58 +03:00
commit 198cdac7d6
59 changed files with 735 additions and 585 deletions

View File

@ -25,17 +25,10 @@ endif ()
# Write compile_commands.json
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
set(PARALLEL_COMPILE_JOBS "" CACHE STRING "Define the maximum number of concurrent compilation jobs")
if (PARALLEL_COMPILE_JOBS)
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool="${PARALLEL_COMPILE_JOBS}")
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
endif ()
set(PARALLEL_LINK_JOBS "" CACHE STRING "Define the maximum number of concurrent link jobs")
if (LLVM_PARALLEL_LINK_JOBS)
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${PARALLEL_LINK_JOBS})
set(CMAKE_JOB_POOL_LINK link_job_pool)
endif ()
set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "")
set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
include (cmake/limit_jobs.cmake)
include (cmake/find_ccache.cmake)
@ -162,51 +155,8 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fn
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}")
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}")
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package (Threads)
include (cmake/test_compiler.cmake)
if (OS_LINUX AND COMPILER_CLANG)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX})
set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.")
if (USE_LIBCXX)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
if (MAKE_STATIC_LIBRARIES)
execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
link_libraries (-nodefaultlibs -Wl,-Bstatic -stdlib=libc++ c++ c++abi gcc_eh ${BUILTINS_LIB_PATH} rt -Wl,-Bdynamic dl pthread m c)
else ()
link_libraries (-stdlib=libc++ c++ c++abi)
endif ()
if (LIBCXX_PATH)
# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
link_directories ("${LIBCXX_PATH}/lib")
endif ()
endif ()
endif ()
if (USE_LIBCXX)
set (STATIC_STDLIB_FLAGS "")
else ()
set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++")
endif ()
if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD))
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
# Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained.
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
endif ()
if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}")
endif ()
include (cmake/use_libcxx.cmake)
if (NOT MAKE_STATIC_LIBRARIES)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@ -268,6 +218,7 @@ include (cmake/find_odbc.cmake)
# openssl, zlib, odbc before poco
include (cmake/find_poco.cmake)
include (cmake/find_lz4.cmake)
include (cmake/find_xxhash.cmake)
include (cmake/find_sparsehash.cmake)
include (cmake/find_rt.cmake)
include (cmake/find_execinfo.cmake)

10
cmake/find_xxhash.cmake Normal file
View File

@ -0,0 +1,10 @@
if (LZ4_INCLUDE_DIR)
if (NOT EXISTS "${LZ4_INCLUDE_DIR}/xxhash.h")
message (WARNING "LZ4 library does not have XXHash. Support for XXHash will be disabled.")
set (USE_XXHASH 0)
else ()
set (USE_XXHASH 1)
endif ()
endif ()
message (STATUS "Using xxhash=${USE_XXHASH}")

35
cmake/limit_jobs.cmake Normal file
View File

@ -0,0 +1,35 @@
# Usage:
# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes
# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
# include (cmake/limit_jobs.cmake)
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
option(PARALLEL_COMPILE_JOBS "Define the maximum number of concurrent compilation jobs" "")
if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY)
math(EXPR PARALLEL_COMPILE_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/2500) # ~2.5gb max per one compiler
if (NOT PARALLEL_COMPILE_JOBS)
set (PARALLEL_COMPILE_JOBS 1)
endif ()
endif ()
if (PARALLEL_COMPILE_JOBS)
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${PARALLEL_COMPILE_JOBS})
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
endif ()
option(PARALLEL_LINK_JOBS "Define the maximum number of concurrent link jobs" "")
if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY)
math(EXPR PARALLEL_LINK_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/4000) # ~4gb max per one linker
if (NOT PARALLEL_LINK_JOBS)
set (PARALLEL_LINK_JOBS 1)
endif ()
endif ()
if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
message(STATUS "Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}")
endif ()
if (LLVM_PARALLEL_LINK_JOBS)
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${PARALLEL_LINK_JOBS})
set(CMAKE_JOB_POOL_LINK link_job_pool)
endif ()

49
cmake/use_libcxx.cmake Normal file
View File

@ -0,0 +1,49 @@
# Uses MAKE_STATIC_LIBRARIES
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package (Threads)
include (cmake/test_compiler.cmake)
include (cmake/arch.cmake)
if (OS_LINUX AND COMPILER_CLANG)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX})
set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.")
if (USE_LIBCXX)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
if (MAKE_STATIC_LIBRARIES)
execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
link_libraries (-nodefaultlibs -Wl,-Bstatic -stdlib=libc++ c++ c++abi gcc_eh ${BUILTINS_LIB_PATH} rt -Wl,-Bdynamic dl pthread m c)
else ()
link_libraries (-stdlib=libc++ c++ c++abi)
endif ()
if (LIBCXX_PATH)
# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
link_directories ("${LIBCXX_PATH}/lib")
endif ()
endif ()
endif ()
if (USE_LIBCXX)
set (STATIC_STDLIB_FLAGS "")
else ()
set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++")
endif ()
if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD))
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
# Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained.
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
endif ()
if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}")
endif ()

View File

@ -27,12 +27,12 @@ elseif (EXISTS ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE})
endif ()
if (COPY_HEADERS_COMPILER AND OS_LINUX)
add_custom_target (copy-headers ALL env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh)
add_custom_target (copy-headers env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh)
if (USE_INTERNAL_LLVM_LIBRARY)
set (CLANG_HEADERS_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/lib/Headers")
set (CLANG_HEADERS_DEST "${TMP_HEADERS_DIR}/usr/local/lib/clang/${LLVM_VERSION}/include") # original: ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include
add_custom_target (copy-headers-clang ALL ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} )
add_custom_target (copy-headers-clang ${CMAKE_COMMAND} -E make_directory ${CLANG_HEADERS_DEST} && ${CMAKE_COMMAND} -E copy_if_different ${CLANG_HEADERS_DIR}/* ${CLANG_HEADERS_DEST} )
add_dependencies (copy-headers copy-headers-clang)
endif ()
endif ()

View File

@ -2,7 +2,11 @@
#include <memory>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <pwd.h>
#include <unistd.h>
#include <Poco/Version.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/Net/HTTPServer.h>
@ -70,6 +74,8 @@ namespace ErrorCodes
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int INVALID_CONFIG_PARAMETER;
extern const int SYSTEM_ERROR;
extern const int FAILED_TO_GETPWUID;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
}
@ -83,6 +89,26 @@ static std::string getCanonicalPath(std::string && path)
return std::move(path);
}
static std::string getUserName(uid_t user_id)
{
/// Try to convert user id into user name.
auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX);
if (buffer_size <= 0)
buffer_size = 1024;
std::string buffer;
buffer.reserve(buffer_size);
struct passwd passwd_entry;
struct passwd * result = nullptr;
const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);
if (error)
throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
else if (result)
return result->pw_name;
return toString(user_id);
}
void Server::uninitialize()
{
logger().information("shutting down");
@ -166,6 +192,26 @@ int Server::main(const std::vector<std::string> & /*args*/)
std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
std::string default_database = config().getString("default_database", "default");
/// Check that the process' user id matches the owner of the data.
const auto effective_user_id = geteuid();
struct stat statbuf;
if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
{
const auto effective_user = getUserName(effective_user_id);
const auto data_owner = getUserName(statbuf.st_uid);
std::string message = "Effective user of the process (" + effective_user +
") does not match the owner of the data (" + data_owner + ").";
if (effective_user_id == 0)
{
message += " Run under 'sudo -u " + data_owner + "'.";
throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
}
else
{
LOG_WARNING(log, message);
}
}
global_context->setPath(path);
/// Create directories for 'path' and for default database, if not exist.

View File

@ -322,17 +322,11 @@ bool ColumnArray::hasEqualOffsets(const ColumnArray & other) const
ColumnPtr ColumnArray::convertToFullColumnIfConst() const
{
ColumnPtr new_data;
if (ColumnPtr full_column = getData().convertToFullColumnIfConst())
new_data = full_column;
else
new_data = data;
return ColumnArray::create(new_data, offsets);
/// It is possible to have an array with constant data and non-constant offsets.
/// Example is the result of expression: replicate('hello', [1])
return ColumnArray::create(data->convertToFullColumnIfConst(), offsets);
}
void ColumnArray::getExtremes(Field & min, Field & max) const
{
min = Array();

View File

@ -22,8 +22,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
: nested_column(std::move(nested_column_)), null_map(std::move(null_map_))
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
if (ColumnPtr nested_column_materialized = getNestedColumn().convertToFullColumnIfConst())
nested_column = nested_column_materialized;
nested_column = getNestedColumn().convertToFullColumnIfConst();
if (!getNestedColumn().canBeInsideNullable())
throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column", ErrorCodes::ILLEGAL_COLUMN};

View File

@ -45,7 +45,7 @@ public:
/** If column isn't constant, returns nullptr (or itself).
* If column is constant, transforms constant to full column (if column type allows such tranform) and return it.
*/
virtual Ptr convertToFullColumnIfConst() const { return {}; }
virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
/// If column isn't ColumnLowCardinality, return itself.
/// If column is ColumnLowCardinality, transforms is to full column.

View File

@ -403,6 +403,8 @@ namespace ErrorCodes
extern const int NULL_POINTER_DEREFERENCE = 426;
extern const int CANNOT_COMPILE_REGEXP = 427;
extern const int UNKNOWN_LOG_LEVEL = 428;
extern const int FAILED_TO_GETPWUID = 429;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA = 430;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -9,11 +9,13 @@
#cmakedefine01 USE_RDKAFKA
#cmakedefine01 USE_CAPNP
#cmakedefine01 USE_EMBEDDED_COMPILER
#cmakedefine01 LLVM_HAS_RTTI
#cmakedefine01 USE_POCO_SQLODBC
#cmakedefine01 USE_POCO_DATAODBC
#cmakedefine01 USE_POCO_MONGODB
#cmakedefine01 USE_POCO_NETSSL
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 USE_BASE64
#cmakedefine01 USE_HDFS
#cmakedefine01 USE_XXHASH
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 LLVM_HAS_RTTI

View File

@ -46,12 +46,7 @@ void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr
/** If there are columns-constants - then we materialize them.
* (Since the data type does not know how to serialize / deserialize constants.)
*/
ColumnPtr full_column;
if (ColumnPtr converted = column->convertToFullColumnIfConst())
full_column = converted;
else
full_column = column;
ColumnPtr full_column = column->convertToFullColumnIfConst();
IDataType::SerializeBinaryBulkSettings settings;
settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; };

View File

@ -127,10 +127,7 @@ Block TotalsHavingBlockInputStream::readImpl()
expression->execute(finalized);
size_t filter_column_pos = finalized.getPositionByName(filter_column_name);
ColumnPtr filter_column_ptr = finalized.safeGetByPosition(filter_column_pos).column;
if (ColumnPtr materialized = filter_column_ptr->convertToFullColumnIfConst())
filter_column_ptr = materialized;
ColumnPtr filter_column_ptr = finalized.safeGetByPosition(filter_column_pos).column->convertToFullColumnIfConst();
FilterDescription filter_description(*filter_column_ptr);

View File

@ -14,9 +14,7 @@ Block materializeBlock(const Block & block)
for (size_t i = 0; i < columns; ++i)
{
auto & element = res.getByPosition(i);
auto & src = element.column;
if (ColumnPtr converted = src->convertToFullColumnIfConst())
src = converted;
element.column = element.column->convertToFullColumnIfConst();
}
return res;

View File

@ -1,8 +1,7 @@
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
add_headers_and_sources(clickhouse_functions .)
add_headers_and_sources(clickhouse_functions ./GatherUtils)
add_headers_and_sources(clickhouse_functions ./Conditional)
add_headers_and_sources(clickhouse_functions .)
list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp)
@ -21,7 +20,8 @@ target_link_libraries(clickhouse_functions
${METROHASH_LIBRARIES}
murmurhash
${BASE64_LIBRARY}
${OPENSSL_CRYPTO_LIBRARY})
${OPENSSL_CRYPTO_LIBRARY}
${LZ4_LIBRARY})
target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR})

View File

@ -11,13 +11,14 @@
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include "IFunction.h"
#include "FunctionHelpers.h"
#include "intDiv.h"
#include "castTypeToEither.h"
#include "FunctionFactory.h"
#include <DataTypes/NumberTraits.h>
#include <Common/typeid_cast.h>
#include <Common/Arena.h>
#include <Functions/intDiv.h>
#include <Functions/castTypeToEither.h>
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER

View File

@ -346,11 +346,8 @@ private:
String attr_name = attr_name_col->getValue<String>();
const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]);
ColumnPtr key_col = key_col_with_type.column;
/// Functions in external dictionaries only support full-value (not constant) columns with keys.
if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst())
key_col = key_col_materialized;
ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
if (checkColumn<ColumnTuple>(key_col.get()))
{
@ -578,11 +575,8 @@ private:
String attr_name = attr_name_col->getValue<String>();
const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]);
ColumnPtr key_col = key_col_with_type.column;
/// Functions in external dictionaries only support full-value (not constant) columns with keys.
if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst())
key_col = key_col_materialized;
ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_col).getColumns();
const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
@ -813,11 +807,9 @@ private:
String attr_name = attr_name_col->getValue<String>();
const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]);
ColumnPtr key_col = key_col_with_type.column;
/// Functions in external dictionaries only support full-value (not constant) columns with keys.
if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst())
key_col = key_col_materialized;
ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
if (checkColumn<ColumnTuple>(key_col.get()))
{
@ -1079,11 +1071,9 @@ private:
String attr_name = attr_name_col->getValue<String>();
const ColumnWithTypeAndName & key_col_with_type = block.getByPosition(arguments[2]);
ColumnPtr key_col = key_col_with_type.column;
/// Functions in external dictionaries only support full-value (not constant) columns with keys.
if (ColumnPtr key_col_materialized = key_col_with_type.column->convertToFullColumnIfConst())
key_col = key_col_materialized;
ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst();
const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_col).getColumns();
const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
@ -1691,7 +1681,7 @@ static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & c
}
}
const auto full_column = column.isColumnConst() ? column.convertToFullColumnIfConst() : column.getPtr();
const auto full_column = column.convertToFullColumnIfConst();
// With type conversion and const columns we need to use backup storage here
const auto size = full_column->size();

View File

@ -227,7 +227,6 @@ protected:
bool executeOperationTyped(const IColumn * in_untyped, PaddedPODArray<OutputType> & dst, const IColumn * centroids_array_untyped)
{
const auto maybe_const = in_untyped->convertToFullColumnIfConst();
if (maybe_const)
in_untyped = maybe_const.get();
const auto in_vector = checkAndGetColumn<ColumnVector<InputType>>(in_untyped);

View File

@ -1,5 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsHashing.h>
#include <Common/config.h>
namespace DB
@ -27,5 +28,10 @@ void registerFunctionsHashing(FunctionFactory & factory)
factory.registerFunction<FunctionMurmurHash3_32>();
factory.registerFunction<FunctionMurmurHash3_64>();
factory.registerFunction<FunctionMurmurHash3_128>();
#if USE_XXHASH
factory.registerFunction<FunctionXxHash32>();
factory.registerFunction<FunctionXxHash64>();
#endif
}
}

View File

@ -8,6 +8,11 @@
#include <murmurhash2.h>
#include <murmurhash3.h>
#include <Common/config.h>
#if USE_XXHASH
#include <xxhash.h>
#endif
#include <Poco/ByteOrder.h>
#include <Common/SipHash.h>
@ -117,6 +122,7 @@ struct HalfMD5Impl
/// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions.
/// Otherwise it will hash bytes in memory as a string using corresponding hash function.
static constexpr bool use_int_hash_for_pods = false;
};
@ -402,6 +408,49 @@ struct ImplMetroHash64
};
#if USE_XXHASH
struct ImplXxHash32
{
static constexpr auto name = "xxHash32";
using ReturnType = UInt32;
static auto apply(const char * s, const size_t len) { return XXH32(s, len, 0); }
/**
* With current implementation with more than 1 arguments it will give the results
* non-reproducable from outside of CH.
*
* Proper way of combining several input is to use streaming mode of hash function
* https://github.com/Cyan4973/xxHash/issues/114#issuecomment-334908566
*
* In common case doable by init_state / update_state / finalize_state
*/
static auto combineHashes(UInt32 h1, UInt32 h2) { return IntHash32Impl::apply(h1) ^ h2; }
static constexpr bool use_int_hash_for_pods = false;
};
struct ImplXxHash64
{
static constexpr auto name = "xxHash64";
using ReturnType = UInt64;
using uint128_t = CityHash_v1_0_2::uint128;
static auto apply(const char * s, const size_t len) { return XXH64(s, len, 0); }
/*
With current implementation with more than 1 arguments it will give the results
non-reproducable from outside of CH. (see comment on ImplXxHash32).
*/
static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); }
static constexpr bool use_int_hash_for_pods = false;
};
#endif
template <typename Impl>
class FunctionStringHashFixedString : public IFunction
{
@ -1027,4 +1076,9 @@ using FunctionMurmurHash3_128 = FunctionStringHashFixedString<MurmurHash3Impl128
using FunctionJavaHash = FunctionAnyHash<JavaHashImpl>;
using FunctionHiveHash = FunctionAnyHash<HiveHashImpl>;
#if USE_XXHASH
using FunctionXxHash32 = FunctionAnyHash<ImplXxHash32>;
using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
#endif
}

View File

@ -157,10 +157,7 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const Colum
if (!result_null_map_column)
return makeNullable(src);
if (src_not_nullable->isColumnConst())
return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column);
else
return ColumnNullable::create(src_not_nullable, result_null_map_column);
}
@ -431,9 +428,7 @@ void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, si
executeWithoutLowCardinalityColumns(block_without_low_cardinality, args, result, block_without_low_cardinality.rows(), dry_run);
auto & keys = block_without_low_cardinality.safeGetByPosition(result).column;
if (auto full_column = keys->convertToFullColumnIfConst())
keys = full_column;
auto keys = block_without_low_cardinality.safeGetByPosition(result).column->convertToFullColumnIfConst();
auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());

View File

@ -69,8 +69,7 @@ public:
if (!arg.type->equals(*elem_type))
preprocessed_column = castColumn(arg, elem_type, context);
if (ColumnPtr materialized_column = preprocessed_column->convertToFullColumnIfConst())
preprocessed_column = materialized_column;
preprocessed_column = preprocessed_column->convertToFullColumnIfConst();
columns_holder[i] = std::move(preprocessed_column);
columns[i] = columns_holder[i].get();

View File

@ -61,21 +61,10 @@ private:
static constexpr size_t INITIAL_SIZE_DEGREE = 9;
template <typename T>
bool executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values);
bool executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values);
bool execute128bit(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
const ColumnRawPtrs & null_maps,
ColumnUInt32::Container & res_values,
bool has_nullable_columns);
void executeHashed(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
ColumnUInt32::Container & res_values);
bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
};
@ -83,14 +72,14 @@ template <typename Derived>
void FunctionArrayEnumerateExtended<Derived>::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
{
const ColumnArray::Offsets * offsets = nullptr;
ColumnRawPtrs data_columns;
data_columns.reserve(arguments.size());
size_t num_arguments = arguments.size();
ColumnRawPtrs data_columns(num_arguments);
bool has_nullable_columns = false;
for (size_t i = 0; i < arguments.size(); ++i)
Columns array_holders;
ColumnPtr offsets_column;
for (size_t i = 0; i < num_arguments; ++i)
{
ColumnPtr array_ptr = block.getByPosition(arguments[i]).column;
const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column;
const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
if (!array)
{
@ -100,101 +89,84 @@ void FunctionArrayEnumerateExtended<Derived>::executeImpl(Block & block, const C
throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName()
+ " of " + toString(i + 1) + "-th argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
array_ptr = const_array->convertToFullColumn();
array = checkAndGetColumn<ColumnArray>(array_ptr.get());
array_holders.emplace_back(const_array->convertToFullColumn());
array = checkAndGetColumn<ColumnArray>(array_holders.back().get());
}
const ColumnArray::Offsets & offsets_i = array->getOffsets();
if (i == 0)
{
offsets = &offsets_i;
offsets_column = array->getOffsetsPtr();
}
else if (offsets_i != *offsets)
throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.",
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
auto * array_data = &array->getData();
data_columns.push_back(array_data);
data_columns[i] = array_data;
}
size_t num_columns = data_columns.size();
ColumnRawPtrs original_data_columns(num_columns);
ColumnRawPtrs null_maps(num_columns);
const NullMap * null_map = nullptr;
for (size_t i = 0; i < num_columns; ++i)
for (size_t i = 0; i < num_arguments; ++i)
{
original_data_columns[i] = data_columns[i];
if (data_columns[i]->isColumnNullable())
{
has_nullable_columns = true;
const auto & nullable_col = static_cast<const ColumnNullable &>(*data_columns[i]);
if (num_arguments == 1)
data_columns[i] = &nullable_col.getNestedColumn();
null_maps[i] = &nullable_col.getNullMapColumn();
null_map = &nullable_col.getNullMapData();
break;
}
else
null_maps[i] = nullptr;
}
const ColumnArray * first_array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments.at(0)).column.get());
const IColumn * first_null_map = null_maps[0];
auto res_nested = ColumnUInt32::create();
ColumnUInt32::Container & res_values = res_nested->getData();
if (!offsets->empty())
res_values.resize(offsets->back());
if (num_columns == 1)
if (num_arguments == 1)
{
if (!(executeNumber<UInt8>(first_array, first_null_map, res_values)
|| executeNumber<UInt16>(first_array, first_null_map, res_values)
|| executeNumber<UInt32>(first_array, first_null_map, res_values)
|| executeNumber<UInt64>(first_array, first_null_map, res_values)
|| executeNumber<Int8>(first_array, first_null_map, res_values)
|| executeNumber<Int16>(first_array, first_null_map, res_values)
|| executeNumber<Int32>(first_array, first_null_map, res_values)
|| executeNumber<Int64>(first_array, first_null_map, res_values)
|| executeNumber<Float32>(first_array, first_null_map, res_values)
|| executeNumber<Float64>(first_array, first_null_map, res_values)
|| executeString (first_array, first_null_map, res_values)))
executeHashed(*offsets, original_data_columns, res_values);
executeNumber<UInt8>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<UInt16>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<UInt32>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<UInt64>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int8>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int16>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int32>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int64>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Float32>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Float64>(*offsets, *data_columns[0], null_map, res_values)
|| executeString(*offsets, *data_columns[0], null_map, res_values)
|| executeHashed(*offsets, data_columns, res_values);
}
else
{
if (!execute128bit(*offsets, data_columns, null_maps, res_values, has_nullable_columns))
executeHashed(*offsets, original_data_columns, res_values);
execute128bit(*offsets, data_columns, res_values)
|| executeHashed(*offsets, data_columns, res_values);
}
block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), first_array->getOffsetsPtr());
block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), offsets_column);
}
template <typename Derived>
template <typename T>
bool FunctionArrayEnumerateExtended<Derived>::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values)
bool FunctionArrayEnumerateExtended<Derived>::executeNumber(
const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
{
const IColumn * inner_col;
const auto & array_data = array->getData();
if (array_data.isColumnNullable())
{
const auto & nullable_col = static_cast<const ColumnNullable &>(array_data);
inner_col = &nullable_col.getNestedColumn();
}
else
inner_col = &array_data;
const ColumnVector<T> * nested = checkAndGetColumn<ColumnVector<T>>(inner_col);
if (!nested)
const ColumnVector<T> * data_concrete = checkAndGetColumn<ColumnVector<T>>(&data);
if (!data_concrete)
return false;
const ColumnArray::Offsets & offsets = array->getOffsets();
const typename ColumnVector<T>::Container & values = nested->getData();
const auto & values = data_concrete->getData();
using ValuesToIndices = ClearableHashMap<T, UInt32, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
null_map_data = &static_cast<const ColumnUInt8 *>(null_map)->getData();
ValuesToIndices indices;
size_t prev_off = 0;
if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniq>)
@ -207,7 +179,7 @@ bool FunctionArrayEnumerateExtended<Derived>::executeNumber(const ColumnArray *
size_t off = offsets[i];
for (size_t j = prev_off; j < off; ++j)
{
if (null_map_data && ((*null_map_data)[j] == 1))
if (null_map && (*null_map)[j])
res_values[j] = ++null_count;
else
res_values[j] = ++indices[values[j]];
@ -226,7 +198,7 @@ bool FunctionArrayEnumerateExtended<Derived>::executeNumber(const ColumnArray *
size_t off = offsets[i];
for (size_t j = prev_off; j < off; ++j)
{
if (null_map_data && ((*null_map_data)[j] == 1))
if (null_map && (*null_map)[j])
{
if (!null_index)
null_index = ++rank;
@ -247,32 +219,17 @@ bool FunctionArrayEnumerateExtended<Derived>::executeNumber(const ColumnArray *
}
template <typename Derived>
bool FunctionArrayEnumerateExtended<Derived>::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values)
bool FunctionArrayEnumerateExtended<Derived>::executeString(
const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
{
const IColumn * inner_col;
const auto & array_data = array->getData();
if (array_data.isColumnNullable())
{
const auto & nullable_col = static_cast<const ColumnNullable &>(array_data);
inner_col = &nullable_col.getNestedColumn();
}
else
inner_col = &array_data;
const ColumnString * nested = checkAndGetColumn<ColumnString>(inner_col);
if (!nested)
const ColumnString * values = checkAndGetColumn<ColumnString>(&data);
if (!values)
return false;
const ColumnArray::Offsets & offsets = array->getOffsets();
size_t prev_off = 0;
using ValuesToIndices = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
null_map_data = &static_cast<const ColumnUInt8 *>(null_map)->getData();
ValuesToIndices indices;
if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniq>)
{
@ -284,10 +241,10 @@ bool FunctionArrayEnumerateExtended<Derived>::executeString(const ColumnArray *
size_t off = offsets[i];
for (size_t j = prev_off; j < off; ++j)
{
if (null_map_data && ((*null_map_data)[j] == 1))
if (null_map && (*null_map)[j])
res_values[j] = ++null_count;
else
res_values[j] = ++indices[nested->getDataAt(j)];
res_values[j] = ++indices[values->getDataAt(j)];
}
prev_off = off;
}
@ -303,7 +260,7 @@ bool FunctionArrayEnumerateExtended<Derived>::executeString(const ColumnArray *
size_t off = offsets[i];
for (size_t j = prev_off; j < off; ++j)
{
if (null_map_data && ((*null_map_data)[j] == 1))
if (null_map && (*null_map)[j])
{
if (!null_index)
null_index = ++rank;
@ -311,7 +268,7 @@ bool FunctionArrayEnumerateExtended<Derived>::executeString(const ColumnArray *
}
else
{
auto & idx = indices[nested->getDataAt(j)];
auto & idx = indices[values->getDataAt(j)];
if (!idx)
idx = ++rank;
res_values[j] = idx;
@ -327,9 +284,7 @@ template <typename Derived>
bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
const ColumnRawPtrs & null_maps,
ColumnUInt32::Container & res_values,
bool has_nullable_columns)
ColumnUInt32::Container & res_values)
{
size_t count = columns.size();
size_t keys_bytes = 0;
@ -342,8 +297,6 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
key_sizes[j] = columns[j]->sizeOfValueIfFixed();
keys_bytes += key_sizes[j];
}
if (has_nullable_columns)
keys_bytes += std::tuple_size<KeysNullMap<UInt128>>::value;
if (keys_bytes > 16)
return false;
@ -361,29 +314,7 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
indices.clear();
size_t off = offsets[i];
for (size_t j = prev_off; j < off; ++j)
{
if (has_nullable_columns)
{
KeysNullMap<UInt128> bitmap{};
for (size_t i = 0; i < columns.size(); ++i)
{
if (null_maps[i])
{
const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[i]).getData();
if (null_map[j] == 1)
{
size_t bucket = i / 8;
size_t offset = i % 8;
bitmap[bucket] |= UInt8(1) << offset;
}
}
}
res_values[j] = ++indices[packFixed<UInt128>(j, count, columns, key_sizes, bitmap)];
}
else
res_values[j] = ++indices[packFixed<UInt128>(j, count, columns, key_sizes)];
}
prev_off = off;
}
}
@ -396,37 +327,12 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
size_t off = offsets[i];
size_t rank = 0;
for (size_t j = prev_off; j < off; ++j)
{
if (has_nullable_columns)
{
KeysNullMap<UInt128> bitmap{};
for (size_t i = 0; i < columns.size(); ++i)
{
if (null_maps[i])
{
const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[i]).getData();
if (null_map[j] == 1)
{
size_t bucket = i / 8;
size_t offset = i % 8;
bitmap[bucket] |= UInt8(1) << offset;
}
}
}
auto &idx = indices[packFixed<UInt128>(j, count, columns, key_sizes, bitmap)];
if (!idx)
idx = ++rank;
res_values[j] = idx;
}
else
{
auto &idx = indices[packFixed<UInt128>(j, count, columns, key_sizes)];;
if (!idx)
idx = ++rank;
res_values[j] = idx;
}
}
prev_off = off;
}
}
@ -435,7 +341,7 @@ bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
}
template <typename Derived>
void FunctionArrayEnumerateExtended<Derived>::executeHashed(
bool FunctionArrayEnumerateExtended<Derived>::executeHashed(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
ColumnUInt32::Container & res_values)
@ -479,6 +385,8 @@ void FunctionArrayEnumerateExtended<Derived>::executeHashed(
prev_off = off;
}
}
return true;
}
}

View File

@ -838,14 +838,8 @@ private:
null_map_data, nullptr);
else
{
/// If item_arg is tuple and have constants.
if (ColumnPtr materialized_tuple = item_arg.convertToFullColumnIfConst())
ArrayIndexGenericImpl<IndexConv, false>::vector(
col_nested, col_array->getOffsets(), *materialized_tuple, col_res->getData(),
null_map_data, null_map_item);
else
ArrayIndexGenericImpl<IndexConv, false>::vector(
col_nested, col_array->getOffsets(), item_arg, col_res->getData(),
col_nested, col_array->getOffsets(), *item_arg.convertToFullColumnIfConst(), col_res->getData(),
null_map_data, null_map_item);
}

View File

@ -23,9 +23,7 @@ struct ArrayMapImpl
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
{
return mapped->isColumnConst()
? ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr())
: ColumnArray::create(mapped, array.getOffsetsPtr());
return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr());
}
};

View File

@ -63,37 +63,23 @@ private:
static constexpr size_t INITIAL_SIZE_DEGREE = 9;
template <typename T>
bool executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values);
bool executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values);
bool execute128bit(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
const ColumnRawPtrs & null_maps,
ColumnUInt32::Container & res_values,
bool has_nullable_columns);
void executeHashed(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
ColumnUInt32::Container & res_values);
bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
bool executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
};
void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
{
Columns array_columns(arguments.size());
const ColumnArray::Offsets * offsets = nullptr;
ColumnRawPtrs data_columns(arguments.size());
ColumnRawPtrs original_data_columns(arguments.size());
ColumnRawPtrs null_maps(arguments.size());
size_t num_arguments = arguments.size();
ColumnRawPtrs data_columns(num_arguments);
bool has_nullable_columns = false;
for (size_t i = 0; i < arguments.size(); ++i)
Columns array_holders;
for (size_t i = 0; i < num_arguments; ++i)
{
ColumnPtr array_ptr = block.getByPosition(arguments[i]).column;
const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column;
const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
if (!array)
{
@ -101,14 +87,12 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen
block.getByPosition(arguments[i]).column.get());
if (!const_array)
throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName()
+ " of " + toString(i + 1) + getOrdinalSuffix(i + 1) + " argument of function " + getName(),
+ " of " + toString(i + 1) + "-th argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
array_ptr = const_array->convertToFullColumn();
array = static_cast<const ColumnArray *>(array_ptr.get());
array_holders.emplace_back(const_array->convertToFullColumn());
array = checkAndGetColumn<ColumnArray>(array_holders.back().get());
}
array_columns[i] = array_ptr;
const ColumnArray::Offsets & offsets_i = array->getOffsets();
if (i == 0)
offsets = &offsets_i;
@ -116,78 +100,65 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen
throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.",
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
data_columns[i] = &array->getData();
original_data_columns[i] = data_columns[i];
auto * array_data = &array->getData();
data_columns[i] = array_data;
}
const NullMap * null_map = nullptr;
for (size_t i = 0; i < num_arguments; ++i)
{
if (data_columns[i]->isColumnNullable())
{
has_nullable_columns = true;
const auto & nullable_col = static_cast<const ColumnNullable &>(*data_columns[i]);
if (num_arguments == 1)
data_columns[i] = &nullable_col.getNestedColumn();
null_maps[i] = &nullable_col.getNullMapColumn();
null_map = &nullable_col.getNullMapData();
break;
}
else
null_maps[i] = nullptr;
}
const ColumnArray * first_array = static_cast<const ColumnArray *>(array_columns[0].get());
const IColumn * first_null_map = null_maps[0];
auto res = ColumnUInt32::create();
ColumnUInt32::Container & res_values = res->getData();
res_values.resize(offsets->size());
if (arguments.size() == 1)
if (num_arguments == 1)
{
if (!(executeNumber<UInt8>(first_array, first_null_map, res_values)
|| executeNumber<UInt16>(first_array, first_null_map, res_values)
|| executeNumber<UInt32>(first_array, first_null_map, res_values)
|| executeNumber<UInt64>(first_array, first_null_map, res_values)
|| executeNumber<Int8>(first_array, first_null_map, res_values)
|| executeNumber<Int16>(first_array, first_null_map, res_values)
|| executeNumber<Int32>(first_array, first_null_map, res_values)
|| executeNumber<Int64>(first_array, first_null_map, res_values)
|| executeNumber<Float32>(first_array, first_null_map, res_values)
|| executeNumber<Float64>(first_array, first_null_map, res_values)
|| executeString(first_array, first_null_map, res_values)))
executeHashed(*offsets, original_data_columns, res_values);
executeNumber<UInt8>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<UInt16>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<UInt32>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<UInt64>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int8>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int16>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int32>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Int64>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Float32>(*offsets, *data_columns[0], null_map, res_values)
|| executeNumber<Float64>(*offsets, *data_columns[0], null_map, res_values)
|| executeString(*offsets, *data_columns[0], null_map, res_values)
|| executeHashed(*offsets, data_columns, res_values);
}
else
{
if (!execute128bit(*offsets, data_columns, null_maps, res_values, has_nullable_columns))
executeHashed(*offsets, original_data_columns, res_values);
execute128bit(*offsets, data_columns, res_values)
|| executeHashed(*offsets, data_columns, res_values);
}
block.getByPosition(result).column = std::move(res);
}
template <typename T>
bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values)
bool FunctionArrayUniq::executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
{
const IColumn * inner_col;
const auto & array_data = array->getData();
if (array_data.isColumnNullable())
{
const auto & nullable_col = static_cast<const ColumnNullable &>(array_data);
inner_col = &nullable_col.getNestedColumn();
}
else
inner_col = &array_data;
const ColumnVector<T> * nested = checkAndGetColumn<ColumnVector<T>>(inner_col);
const ColumnVector<T> * nested = checkAndGetColumn<ColumnVector<T>>(&data);
if (!nested)
return false;
const ColumnArray::Offsets & offsets = array->getOffsets();
const typename ColumnVector<T>::Container & values = nested->getData();
const auto & values = nested->getData();
using Set = ClearableHashSet<T, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
null_map_data = &static_cast<const ColumnUInt8 *>(null_map)->getData();
Set set;
ColumnArray::Offset prev_off = 0;
for (size_t i = 0; i < offsets.size(); ++i)
@ -197,7 +168,7 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn *
ColumnArray::Offset off = offsets[i];
for (ColumnArray::Offset j = prev_off; j < off; ++j)
{
if (null_map_data && ((*null_map_data)[j] == 1))
if (null_map && (*null_map)[j])
found_null = true;
else
set.insert(values[j]);
@ -209,31 +180,15 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn *
return true;
}
bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values)
bool FunctionArrayUniq::executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
{
const IColumn * inner_col;
const auto & array_data = array->getData();
if (array_data.isColumnNullable())
{
const auto & nullable_col = static_cast<const ColumnNullable &>(array_data);
inner_col = &nullable_col.getNestedColumn();
}
else
inner_col = &array_data;
const ColumnString * nested = checkAndGetColumn<ColumnString>(inner_col);
const ColumnString * nested = checkAndGetColumn<ColumnString>(&data);
if (!nested)
return false;
const ColumnArray::Offsets & offsets = array->getOffsets();
using Set = ClearableHashSet<StringRef, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
const PaddedPODArray<UInt8> * null_map_data = nullptr;
if (null_map)
null_map_data = &static_cast<const ColumnUInt8 *>(null_map)->getData();
Set set;
ColumnArray::Offset prev_off = 0;
for (size_t i = 0; i < offsets.size(); ++i)
@ -243,7 +198,7 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn *
ColumnArray::Offset off = offsets[i];
for (ColumnArray::Offset j = prev_off; j < off; ++j)
{
if (null_map_data && ((*null_map_data)[j] == 1))
if (null_map && (*null_map)[j])
found_null = true;
else
set.insert(nested->getDataAt(j));
@ -259,9 +214,7 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn *
bool FunctionArrayUniq::execute128bit(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
const ColumnRawPtrs & null_maps,
ColumnUInt32::Container & res_values,
bool has_nullable_columns)
ColumnUInt32::Container & res_values)
{
size_t count = columns.size();
size_t keys_bytes = 0;
@ -274,8 +227,6 @@ bool FunctionArrayUniq::execute128bit(
key_sizes[j] = columns[j]->sizeOfValueIfFixed();
keys_bytes += key_sizes[j];
}
if (has_nullable_columns)
keys_bytes += std::tuple_size<KeysNullMap<UInt128>>::value;
if (keys_bytes > 16)
return false;
@ -283,19 +234,6 @@ bool FunctionArrayUniq::execute128bit(
using Set = ClearableHashSet<UInt128, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
/// Suppose that, for a given row, each of the N columns has an array whose length is M.
/// Denote arr_i each of these arrays (1 <= i <= N). Then the following is performed:
///
/// col1 ... colN
///
/// arr_1[1], ..., arr_N[1] -> pack into a binary blob b1
/// .
/// .
/// .
/// arr_1[M], ..., arr_N[M] -> pack into a binary blob bM
///
/// Each binary blob is inserted into a hash table.
///
Set set;
ColumnArray::Offset prev_off = 0;
for (ColumnArray::Offset i = 0; i < offsets.size(); ++i)
@ -303,29 +241,7 @@ bool FunctionArrayUniq::execute128bit(
set.clear();
ColumnArray::Offset off = offsets[i];
for (ColumnArray::Offset j = prev_off; j < off; ++j)
{
if (has_nullable_columns)
{
KeysNullMap<UInt128> bitmap{};
for (ColumnArray::Offset i = 0; i < columns.size(); ++i)
{
if (null_maps[i])
{
const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[i]).getData();
if (null_map[j] == 1)
{
ColumnArray::Offset bucket = i / 8;
ColumnArray::Offset offset = i % 8;
bitmap[bucket] |= UInt8(1) << offset;
}
}
}
set.insert(packFixed<UInt128>(j, count, columns, key_sizes, bitmap));
}
else
set.insert(packFixed<UInt128>(j, count, columns, key_sizes));
}
res_values[i] = set.size();
prev_off = off;
@ -334,7 +250,7 @@ bool FunctionArrayUniq::execute128bit(
return true;
}
void FunctionArrayUniq::executeHashed(
bool FunctionArrayUniq::executeHashed(
const ColumnArray::Offsets & offsets,
const ColumnRawPtrs & columns,
ColumnUInt32::Container & res_values)
@ -356,6 +272,8 @@ void FunctionArrayUniq::executeHashed(
res_values[i] = set.size();
prev_off = off;
}
return true;
}

View File

@ -638,9 +638,7 @@ private:
static ColumnPtr materializeColumnIfConst(const ColumnPtr & column)
{
if (ColumnPtr res = column->convertToFullColumnIfConst())
return res;
return column;
return column->convertToFullColumnIfConst();
}
static ColumnPtr makeNullableColumnIfNot(const ColumnPtr & column)

View File

@ -34,11 +34,7 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const auto & src = block.getByPosition(arguments[0]).column;
if (ColumnPtr converted = src->convertToFullColumnIfConst())
block.getByPosition(result).column = converted;
else
block.getByPosition(result).column = src;
block.getByPosition(result).column = block.getByPosition(arguments[0]).column->convertToFullColumnIfConst();
}
};

View File

@ -81,7 +81,7 @@ public:
while (true)
{
const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', '?', '*', '+', '{', ':', '-'>(src_pos, src_end);
const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end);
size_t bytes_to_copy = next_src_pos - src_pos;
size_t old_dst_size = dst_data.size();

View File

@ -92,7 +92,9 @@ private:
{
#if __SSE4_2__
/// skip whitespace from left in blocks of up to 16 characters
constexpr auto left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT;
/// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate
enum { left_sse_mode = base_sse_mode | _SIDD_LEAST_SIGNIFICANT };
while (mask == bytes_sse && chars_to_trim_left < size_sse)
{
const auto chars = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data + chars_to_trim_left));
@ -110,7 +112,9 @@ private:
const auto trim_right_size = size - chars_to_trim_left;
#if __SSE4_2__
/// try to skip whitespace from right in blocks of up to 16 characters
constexpr auto right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT;
/// Avoid gcc bug: _mm_cmpistri: error: the third argument must be an 8-bit immediate
enum { right_sse_mode = base_sse_mode | _SIDD_MOST_SIGNIFICANT };
const auto trim_right_size_sse = trim_right_size - (trim_right_size % bytes_sse);
while (mask == bytes_sse && chars_to_trim_right < trim_right_size_sse)
{

View File

@ -65,14 +65,11 @@ public:
Columns tuple_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
tuple_columns[i] = block.getByPosition(arguments[i]).column;
/** If tuple is mixed of constant and not constant columns,
* convert all to non-constant columns,
* because many places in code expect all non-constant columns in non-constant tuple.
*/
if (ColumnPtr converted = tuple_columns[i]->convertToFullColumnIfConst())
tuple_columns[i] = converted;
tuple_columns[i] = block.getByPosition(arguments[i]).column->convertToFullColumnIfConst();
}
block.getByPosition(result).column = ColumnTuple::create(tuple_columns);
}

View File

@ -772,13 +772,8 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re
/// Remember the columns we will work with
for (size_t i = 0; i < params.keys_size; ++i)
{
key_columns[i] = block.safeGetByPosition(params.keys[i]).column.get();
if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst())
{
materialized_columns.push_back(converted);
materialized_columns.push_back(block.safeGetByPosition(params.keys[i]).column->convertToFullColumnIfConst());
key_columns[i] = materialized_columns.back().get();
}
if (const auto * low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
{
@ -797,13 +792,8 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re
{
for (size_t j = 0; j < aggregate_columns[i].size(); ++j)
{
aggregate_columns[i][j] = block.safeGetByPosition(params.aggregates[i].arguments[j]).column.get();
if (ColumnPtr converted = aggregate_columns[i][j]->convertToFullColumnIfConst())
{
materialized_columns.push_back(converted);
materialized_columns.push_back(block.safeGetByPosition(params.aggregates[i].arguments[j]).column->convertToFullColumnIfConst());
aggregate_columns[i][j] = materialized_columns.back().get();
}
if (auto * col_low_cardinality = typeid_cast<const ColumnLowCardinality *>(aggregate_columns[i][j]))
{

View File

@ -375,10 +375,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
if (array_joined_columns.empty())
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column;
if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst())
any_array_ptr = converted;
ColumnPtr any_array_ptr = block.getByName(*array_joined_columns.begin()).column->convertToFullColumnIfConst();
const ColumnArray * any_array = typeid_cast<const ColumnArray *>(&*any_array_ptr);
if (!any_array)
throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH);
@ -416,10 +413,10 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
Block tmp_block{src_col, column_of_max_length, {{}, src_col.type, {}}};
function_arrayResize->build({src_col, column_of_max_length})->execute(tmp_block, {0, 1}, 2, rows);
any_array_ptr = src_col.column = tmp_block.safeGetByPosition(2).column;
src_col.column = tmp_block.safeGetByPosition(2).column;
any_array_ptr = src_col.column->convertToFullColumnIfConst();
}
if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst())
any_array_ptr = converted;
any_array = typeid_cast<const ColumnArray *>(&*any_array_ptr);
}
else if (array_join_is_left && !unaligned_array_join)
@ -434,10 +431,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
non_empty_array_columns[name] = tmp_block.safeGetByPosition(1).column;
}
any_array_ptr = non_empty_array_columns.begin()->second;
if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst())
any_array_ptr = converted;
any_array_ptr = non_empty_array_columns.begin()->second->convertToFullColumnIfConst();
any_array = &typeid_cast<const ColumnArray &>(*any_array_ptr);
}
@ -452,9 +446,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
ColumnPtr array_ptr = (array_join_is_left && !unaligned_array_join) ? non_empty_array_columns[current.name] : current.column;
if (ColumnPtr converted = array_ptr->convertToFullColumnIfConst())
array_ptr = converted;
array_ptr = array_ptr->convertToFullColumnIfConst();
const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
if (!unaligned_array_join && !array.hasEqualOffsets(typeid_cast<const ColumnArray &>(*any_array_ptr)))

View File

@ -437,14 +437,8 @@ bool Join::insertFromBlock(const Block & block)
/// Memoize key columns to work.
for (size_t i = 0; i < keys_size; ++i)
{
materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column));
materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column->convertToFullColumnIfConst()));
key_columns[i] = materialized_columns.back().get();
if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst())
{
materialized_columns.emplace_back(converted);
key_columns[i] = materialized_columns.back().get();
}
}
/// We will insert to the map only keys, where all components are not NULL.
@ -483,11 +477,7 @@ bool Join::insertFromBlock(const Block & block)
/// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
for (size_t i = 0; i < size; ++i)
{
ColumnPtr col = stored_block->safeGetByPosition(i).column;
if (ColumnPtr converted = col->convertToFullColumnIfConst())
stored_block->safeGetByPosition(i).column = converted;
}
stored_block->safeGetByPosition(i).column = stored_block->safeGetByPosition(i).column->convertToFullColumnIfConst();
/// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable.
if (use_nulls && (kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Full))
@ -685,14 +675,8 @@ void Join::joinBlockImpl(
/// Memoize key columns to work with.
for (size_t i = 0; i < keys_size; ++i)
{
materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column));
materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst()));
key_columns[i] = materialized_columns.back().get();
if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst())
{
materialized_columns.emplace_back(converted);
key_columns[i] = materialized_columns.back().get();
}
}
/// Keys with NULL value in any column won't join to anything.
@ -710,10 +694,7 @@ void Join::joinBlockImpl(
{
for (size_t i = 0; i < existing_columns; ++i)
{
auto & col = block.getByPosition(i).column;
if (ColumnPtr converted = col->convertToFullColumnIfConst())
col = converted;
block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst();
/// If use_nulls, convert left columns (except keys) to Nullable.
if (use_nulls)

View File

@ -121,15 +121,10 @@ void Set::setHeader(const Block & block)
/// Remember the columns we will work with
for (size_t i = 0; i < keys_size; ++i)
{
key_columns.emplace_back(block.safeGetByPosition(i).column.get());
materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst());
key_columns.emplace_back(materialized_columns.back().get());
data_types.emplace_back(block.safeGetByPosition(i).type);
if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst())
{
materialized_columns.emplace_back(converted);
key_columns.back() = materialized_columns.back().get();
}
/// Convert low cardinality column to full.
if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(data_types.back().get()))
{
@ -175,20 +170,8 @@ bool Set::insertFromBlock(const Block & block)
/// Remember the columns we will work with
for (size_t i = 0; i < keys_size; ++i)
{
key_columns.emplace_back(block.safeGetByPosition(i).column.get());
if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst())
{
materialized_columns.emplace_back(converted);
key_columns.back() = materialized_columns.back().get();
}
/// Convert low cardinality column to full.
if (key_columns.back()->lowCardinality())
{
materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality());
key_columns.back() = materialized_columns.back().get();
}
materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality());
key_columns.emplace_back(materialized_columns.back().get());
}
size_t rows = block.rows();
@ -365,18 +348,13 @@ ColumnPtr Set::execute(const Block & block, bool negative) const
for (size_t i = 0; i < num_key_columns; ++i)
{
key_columns.push_back(block.safeGetByPosition(i).column.get());
if (!removeNullable(data_types[i])->equals(*removeNullable(block.safeGetByPosition(i).type)))
throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: "
+ data_types[i]->getName() + " on the right, " + block.safeGetByPosition(i).type->getName() +
" on the left.", ErrorCodes::TYPE_MISMATCH);
if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst())
{
materialized_columns.emplace_back(converted);
key_columns.back() = materialized_columns.back().get();
}
materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst());
key_columns.emplace_back() = materialized_columns.back().get();
}
/// We will check existence in Set only for keys, where all components are not NULL.

View File

@ -67,8 +67,7 @@ void evaluateMissingDefaults(Block & block,
if (copy_block.has(col->name))
{
auto evaluated_col = copy_block.getByName(col->name);
if (ColumnPtr converted = evaluated_col.column->convertToFullColumnIfConst())
evaluated_col.column = converted;
evaluated_col.column = evaluated_col.column->convertToFullColumnIfConst();
block.insert(pos, std::move(evaluated_col));
}

View File

@ -51,7 +51,7 @@ void ASTAlterCommand::formatImpl(
if (type == ASTAlterCommand::ADD_COLUMN)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (settings.hilite ? hilite_none : "");
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
col_decl->formatImpl(settings, state, frame);
/// AFTER
@ -64,7 +64,7 @@ void ASTAlterCommand::formatImpl(
else if (type == ASTAlterCommand::DROP_COLUMN)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
<< (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (settings.hilite ? hilite_none : "");
<< (clear_column ? "CLEAR " : "DROP ") << "COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
column->formatImpl(settings, state, frame);
if (partition)
{
@ -74,7 +74,7 @@ void ASTAlterCommand::formatImpl(
}
else if (type == ASTAlterCommand::MODIFY_COLUMN)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (settings.hilite ? hilite_none : "");
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
col_decl->formatImpl(settings, state, frame);
}
else if (type == ASTAlterCommand::MODIFY_ORDER_BY)

View File

@ -78,6 +78,10 @@ public:
bool clear_column = false; /// for CLEAR COLUMN (do not drop column from metadata)
bool if_not_exists = false; /// option for ADD_COLUMN
bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN
/** For FETCH PARTITION - the path in ZK to the shard, from which to download the partition.
*/
String from;

View File

@ -36,6 +36,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_partition("PARTITION");
ParserKeyword s_after("AFTER");
ParserKeyword s_if_not_exists("IF NOT EXISTS");
ParserKeyword s_if_exists("IF EXISTS");
ParserKeyword s_from("FROM");
ParserKeyword s_in_partition("IN PARTITION");
ParserKeyword s_with("WITH");
@ -57,6 +59,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
if (s_add_column.ignore(pos, expected))
{
if (s_if_not_exists.ignore(pos, expected))
command->if_not_exists = true;
if (!parser_col_decl.parse(pos, command->col_decl, expected))
return false;
@ -77,6 +82,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
}
else if (s_drop_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))
command->if_exists = true;
if (!parser_name.parse(pos, command->column, expected))
return false;
@ -85,6 +93,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
}
else if (s_clear_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))
command->if_exists = true;
if (!parser_name.parse(pos, command->column, expected))
return false;
@ -190,6 +201,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
}
else if (s_modify_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))
command->if_exists = true;
if (!parser_modify_col_decl.parse(pos, command->col_decl, expected))
return false;
@ -224,6 +238,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
}
else if (s_comment_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))
command->if_exists = true;
if (!parser_name.parse(pos, command->column, expected))
return false;

View File

@ -8,12 +8,12 @@ namespace DB
/** Query like this:
* ALTER TABLE [db.]name [ON CLUSTER cluster]
* [ADD COLUMN col_name type [AFTER col_after],]
* [DROP COLUMN col_to_drop, ...]
* [CLEAR COLUMN col_to_clear [IN PARTITION partition],]
* [MODIFY COLUMN col_to_modify type, ...]
* [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],]
* [DROP COLUMN [IF EXISTS] col_to_drop, ...]
* [CLEAR COLUMN [IF EXISTS] col_to_clear [IN PARTITION partition],]
* [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...]
* [MODIFY PRIMARY KEY (a, b, c...)]
* [COMMENT COLUMN col_name string]
* [COMMENT COLUMN [IF EXISTS] col_name string]
* [DROP|DETACH|ATTACH PARTITION|PART partition, ...]
* [FETCH PARTITION partition FROM ...]
* [FREEZE [PARTITION] [WITH NAME name]]

View File

@ -52,6 +52,8 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
if (command_ast->column)
command.after_column = typeid_cast<const ASTIdentifier &>(*command_ast->column).name;
command.if_not_exists = command_ast->if_not_exists;
return command;
}
else if (command_ast->type == ASTAlterCommand::DROP_COLUMN && !command_ast->partition)
@ -62,6 +64,7 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
AlterCommand command;
command.type = AlterCommand::DROP_COLUMN;
command.column_name = typeid_cast<const ASTIdentifier &>(*(command_ast->column)).name;
command.if_exists = command_ast->if_exists;
return command;
}
else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN)
@ -88,6 +91,7 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
const auto & ast_comment = typeid_cast<ASTLiteral &>(*ast_col_decl.comment);
command.comment = ast_comment.value.get<String>();
}
command.if_exists = command_ast->if_exists;
return command;
}
@ -99,6 +103,7 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
command.column_name = ast_identifier.name;
const auto & ast_comment = typeid_cast<ASTLiteral &>(*command_ast->comment);
command.comment = ast_comment.value.get<String>();
command.if_exists = command_ast->if_exists;
return command;
}
else if (command_ast->type == ASTAlterCommand::MODIFY_ORDER_BY)
@ -300,6 +305,7 @@ void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & ord
auto new_primary_key_ast = primary_key_ast;
for (const AlterCommand & command : *this)
if (!command.ignore)
command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast);
columns_description = std::move(new_columns_description);
@ -328,18 +334,33 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
if (command.type == AlterCommand::ADD_COLUMN)
{
if (std::end(all_columns) != column_it)
{
if (command.if_not_exists)
command.ignore = true;
else
throw Exception{"Cannot add column " + column_name + ": column with this name already exists", ErrorCodes::ILLEGAL_COLUMN};
}
}
else if (command.type == AlterCommand::MODIFY_COLUMN)
{
if (std::end(all_columns) == column_it)
{
if (command.if_exists)
command.ignore = true;
else
throw Exception{"Wrong column name. Cannot find column " + column_name + " to modify", ErrorCodes::ILLEGAL_COLUMN};
}
if (!command.ignore)
{
all_columns.erase(column_it);
defaults.erase(column_name);
}
}
if (!command.ignore)
{
/// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions
all_columns.emplace_back(column_name, command.data_type ? command.data_type : std::make_shared<DataTypeUInt8>());
@ -347,7 +368,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
{
if (command.data_type)
{
const auto & final_column_name = column_name;
const auto &final_column_name = column_name;
const auto tmp_column_name = final_column_name + "_tmp";
const auto column_type_raw_ptr = command.data_type.get();
@ -370,6 +391,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
}
}
}
}
else if (command.type == AlterCommand::DROP_COLUMN)
{
for (const auto & default_column : defaults)
@ -407,15 +429,23 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
}
if (!found)
{
if (command.if_exists)
command.ignore = true;
else
throw Exception("Wrong column name. Cannot find column " + command.column_name + " to drop",
ErrorCodes::ILLEGAL_COLUMN);
}
}
else if (command.type == AlterCommand::COMMENT_COLUMN)
{
const auto column_it = std::find_if(std::begin(all_columns), std::end(all_columns),
std::bind(namesEqual, std::cref(command.column_name), std::placeholders::_1));
if (column_it == std::end(all_columns))
{
if (command.if_exists)
command.ignore = true;
else
throw Exception{"Wrong column name. Cannot find column " + command.column_name + " to comment", ErrorCodes::ILLEGAL_COLUMN};
}
}

View File

@ -43,15 +43,26 @@ struct AlterCommand
/// For ADD - after which column to add a new one. If an empty string, add to the end. To add to the beginning now it is impossible.
String after_column;
/// For DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN
bool if_exists;
/// For ADD_COLUMN
bool if_not_exists;
/// For MODIFY_ORDER_BY
ASTPtr order_by;
/// indicates that this command should not be applied, for example in case of if_exists=true and column doesn't exist.
bool ignore = false;
AlterCommand() = default;
AlterCommand(const Type type, const String & column_name, const DataTypePtr & data_type,
const ColumnDefaultKind default_kind, const ASTPtr & default_expression,
const String & after_column = String{}, const String & comment = "") // TODO: разобраться здесь с параметром по умолчанию
const String & after_column = String{}, const String & comment = "",
const bool if_exists = false, const bool if_not_exists = false) // TODO: разобраться здесь с параметром по умолчанию
: type{type}, column_name{column_name}, data_type{data_type}, default_kind{default_kind},
default_expression{default_expression}, comment(comment), after_column{after_column}
default_expression{default_expression}, comment(comment), after_column{after_column},
if_exists(if_exists), if_not_exists(if_not_exists)
{}
static std::optional<AlterCommand> parse(const ASTAlterCommand * command);

View File

@ -166,9 +166,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c
/// Filter the block.
String filter_column_name = expression_ast->getColumnName();
ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column;
if (ColumnPtr converted = filter_column->convertToFullColumnIfConst())
filter_column = converted;
ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullColumnIfConst();
const IColumn::Filter & filter = typeid_cast<const ColumnUInt8 &>(*filter_column).getData();
for (size_t i = 0; i < block.columns(); ++i)

View File

@ -0,0 +1,17 @@
<?xml version="1.0"?>
<yandex>
<logger>
<log>/var/log/clickhouse-server/log.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<stderr>/var/log/clickhouse-server/stderr.log</stderr>
<stdout>/var/log/clickhouse-server/stdout.log</stdout>
</logger>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<path>/var/lib/clickhouse/</path>
<mark_cache_size>5368709120</mark_cache_size>
<users_config>users.xml</users_config>
</yandex>

View File

@ -0,0 +1,7 @@
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
</default>
</profiles>
</yandex>

View File

@ -0,0 +1,37 @@
import docker
import os
import pwd
import pytest
import re
from helpers.cluster import ClickHouseCluster, CLICKHOUSE_START_COMMAND
def test_different_user():
current_user_id = os.getuid()
if current_user_id != 0:
return
other_user_id = pwd.getpwnam('nobody').pw_uid
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node')
cluster.start()
docker_api = docker.from_env().api
container = node.get_docker_handle()
container.stop()
container.start()
container.exec_run('chown {} /var/lib/clickhouse'.format(other_user_id), privileged=True)
container.exec_run(CLICKHOUSE_START_COMMAND)
cluster.shutdown() # cleanup
with open(os.path.join(node.path, 'logs/clickhouse-server.err.log')) as log:
expected_message = "Effective user of the process \(.*\) does not match the owner of the data \(.*\)\. Run under 'sudo -u .*'\."
last_message = log.readlines()[-1].strip()
if re.search(expected_message, last_message) is None:
pytest.fail('Expected the server to fail with a message "{}", but the last message is "{}"'.format(expected_message, last_message))

View File

@ -23,6 +23,13 @@ ALTER TABLE test.alter_test DROP COLUMN NestedColumn.S;
ALTER TABLE test.alter_test DROP COLUMN AddedNested1.B;
ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS Added0 UInt32;
ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS AddedNested1 Nested(A UInt32, B UInt64);
ALTER TABLE test.alter_test ADD COLUMN IF NOT EXISTS AddedNested1.C Array(String);
ALTER TABLE test.alter_test MODIFY COLUMN IF EXISTS ToDrop UInt64;
ALTER TABLE test.alter_test DROP COLUMN IF EXISTS ToDrop;
ALTER TABLE test.alter_test COMMENT COLUMN IF EXISTS ToDrop 'new comment';
DESC TABLE test.alter_test;
SELECT * FROM test.alter_test;

View File

@ -0,0 +1,54 @@
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1

View File

@ -0,0 +1,77 @@
SELECT hex(xxHash64('')) = upper('ef46db3751d8e999');
SELECT hex(xxHash32('')) = upper('02cc5d05');
SELECT hex(xxHash64('ABC')) = upper('e66ae7354fcfee98');
SELECT hex(xxHash32('ABC')) = upper('80712ed5');
SELECT hex(xxHash64('xxhash')) = upper('32dd38952c4bc720');
--
SELECT xxHash64(NULL) is NULL;
SELECT xxHash64() = toUInt64(16324913028386710556);
SELECT xxHash64(0) = toUInt64(16804241149081757544);
SELECT xxHash64(123456) = toUInt64(9049736899514479480);
select xxHash64(toUInt8(0)) = xxHash64('\0');
select xxHash64(toUInt16(0)) = xxHash64('\0\0');
select xxHash64(toUInt32(0)) = xxHash64('\0\0\0\0');
select xxHash64(toUInt64(0)) = xxHash64('\0\0\0\0\0\0\0\0');
SELECT xxHash64(CAST(3 AS UInt8)) = toUInt64(2244420788148980662);
SELECT xxHash64(CAST(1.2684 AS Float32)) = toUInt64(6662491266811474554);
SELECT xxHash64(CAST(-154477 AS Int64)) = toUInt64(1162348840373071858);
SELECT xxHash64('') = toUInt64(17241709254077376921);
SELECT xxHash64('foo') = toUInt64(3728699739546630719);
SELECT xxHash64(CAST('foo' AS FixedString(3))) = xxHash64('foo');
SELECT xxHash64(CAST('bar' AS FixedString(3))) = toUInt64(5234164152756840025);
SELECT xxHash64(x) = toUInt64(9962287286179718960) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x);
SELECT xxHash64('\x01') = toUInt64(9962287286179718960);
SELECT xxHash64('\x02\0') = toUInt64(6482051057365497128);
SELECT xxHash64('\x03\0\0\0') = toUInt64(13361037350151369407);
SELECT xxHash64(1) = toUInt64(9962287286179718960);
SELECT xxHash64(toUInt16(2)) = toUInt64(6482051057365497128);
SELECT xxHash64(toUInt32(3)) = toUInt64(13361037350151369407);
SELECT xxHash64(1, 2, 3) = toUInt64(13728743482242651702);
SELECT xxHash64(1, 3, 2) = toUInt64(10226792638577471533);
SELECT xxHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt64(3521288460171939489);
--
SELECT xxHash32(NULL) is NULL;
SELECT xxHash32() = toUInt32(4263699484);
SELECT xxHash32(0) = toUInt32(3479547966);
SELECT xxHash32(123456) = toUInt32(1434661961);
select xxHash32(toUInt8(0)) = xxHash32('\0');
select xxHash32(toUInt16(0)) = xxHash32('\0\0');
select xxHash32(toUInt32(0)) = xxHash32('\0\0\0\0');
SELECT xxHash32(CAST(3 AS UInt8)) = toUInt32(565077562);
SELECT xxHash32(CAST(1.2684 AS Float32)) = toUInt32(3120514536);
SELECT xxHash32(CAST(-154477 AS Int32)) = toUInt32(3279223048);
SELECT xxHash32('') = toUInt32(46947589);
SELECT xxHash32('foo') = toUInt32(3792637401);
SELECT xxHash32(CAST('foo' AS FixedString(3))) = xxHash32('foo');
SELECT xxHash32(CAST('bar' AS FixedString(3))) = toUInt32(1101146924);
SELECT xxHash32(x) = toUInt32(949155633) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x);
SELECT xxHash32('\x01') = toUInt32(949155633);
SELECT xxHash32('\x02\0') = toUInt32(332955956);
SELECT xxHash32('\x03\0\0\0') = toUInt32(2158931063);
SELECT xxHash32(1) = toUInt32(949155633);
SELECT xxHash32(toUInt16(2)) = toUInt32(332955956);
SELECT xxHash32(toUInt32(3)) = toUInt32(2158931063);
SELECT xxHash32(1, 2, 3) = toUInt32(441104368);
SELECT xxHash32(1, 3, 2) = toUInt32(912264289);
SELECT xxHash32(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt32(1930126291);

View File

@ -0,0 +1,2 @@
[]
[1]

View File

@ -0,0 +1,4 @@
SET send_logs_level = 'none';
SELECT arrayEnumerateUniq(anyHeavy([]), []);
SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); -- { serverError 190 }
SELECT arrayEnumerateDense([STDDEV_SAMP(NULL, 910947.571364)], [NULL]);

View File

@ -0,0 +1,5 @@
SET send_logs_level = 'none';
SELECT globalNotIn(['"wh'], [NULL]); -- { serverError 53 }
SELECT globalIn([''], [NULL]); -- { serverError 53 }
SELECT notIn([['']], [[NULL]]); -- { serverError 53 }

View File

@ -1,4 +1,4 @@
DROP TABLE IF EXISTS test.prewhere;
CREATE TABLE test.prewhere (x Array(UInt64), y ALIAS x, s String) ENGINE = MergeTree ORDER BY tuple()
SELECT count() FROM test.prewhere PREWHERE (length(s) >= 1) = 0 WHERE NOT ignore(y)
CREATE TABLE test.prewhere (x Array(UInt64), y ALIAS x, s String) ENGINE = MergeTree ORDER BY tuple();
SELECT count() FROM test.prewhere PREWHERE (length(s) >= 1) = 0 WHERE NOT ignore(y);
DROP TABLE test.prewhere;

View File

@ -1,11 +1,6 @@
SELECT sequenceCount((CAST((( SELECT NULL ) AS rg, ( SELECT ( SELECT [], '<e', caseWithExpr([NULL], -588755.149, []), retention(addWeeks((CAST((-7644612.39732) AS DateTime)), -23578040.02833), (CAST(([]) AS DateTime)), (CAST(([010977.08]) AS String))), emptyArrayToSingle('') ) , '\0', toUInt64([], 't3hw@'), '\0', toStartOfQuarter(-4230.1872, []) ) ) AS Date)));
SELECT globalNotIn(['"wh'], [NULL]);
SELECT globalIn([''], [NULL])
SELECT ( SELECT toDecimal128([], rowNumberInBlock()) ) , lcm('', [[(CAST(('>A') AS String))]]);
SELECT truncate(895, -16);
SELECT arrayEnumerateUniq(anyHeavy([]), []);
SELECT notIn([['']], [[NULL]]);
SELECT subtractDays((CAST((-5263074.47) AS DateTime)), -737895);
SELECT quantileDeterministic([], findClusterIndex(( SELECT subtractDays((CAST((566450.398706) AS DateTime)), 54) ) )), '\0', [];
SELECT addDays((CAST((96.338) AS DateTime)), -3);
SELECT arrayEnumerateDense([], [sequenceCount(NULL)]);

View File

@ -9,13 +9,13 @@ Each functional test sends one or multiple queries to the running ClickHouse ser
Tests are located in `dbms/src/tests/queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself.
To run all tests, use `dbms/tests/clickhouse-test` tool. Look `--help` for the list of possible options. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`.
The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory.
To add new test, create a `.sql` or `.sh` file in `dbms/src/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
To add new test, create a `.sql` or `.sh` file in `dbms/src/tests/queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
@ -24,6 +24,11 @@ If you want to use distributed queries in functional tests, you can leverage `re
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper; `shard` is for tests that requires server to listen `127.0.0.*`; `long` is for tests that run slightly longer that one second.
## Known bugs
If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `dbms/src/tests/queries/bugs` directory. These tests will be moved to `dbms/src/tests/queries/0_stateless` when bugs are fixed.
## Integration Tests
Integration tests allow to test ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software.
@ -55,7 +60,7 @@ Performance tests are not run on per-commit basis. Results of performance tests
Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `dbms/src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing.
You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. There kind of tests are not automated.
You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. These kind of tests are not automated.
## Miscellanous Tests
@ -173,7 +178,7 @@ For production builds, gcc is used (it still generates slightly more efficient c
## Sanitizers
**Address sanitizer**.
We run functional tests under ASan on per-commit basis.
We run functional and integration tests under ASan on per-commit basis.
**Valgrind (Memcheck)**.
We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).
@ -185,7 +190,7 @@ We run functional tests under TSan. ClickHouse must pass all tests. Run under TS
Currently we still don't use MSan.
**Undefined behaviour sanitizer.**
We still don't use UBSan. The only thing to fix is unaligned placement of structs in Arena during aggregation. This is totally fine, we only have to force alignment under UBSan.
We still don't use UBSan on per commit basis. There are some places to fix.
**Debug allocator.**
You can enable debug version of `tcmalloc` with `DEBUG_TCMALLOC` CMake option. We run tests with debug allocator on per-commit basis.
@ -195,7 +200,9 @@ You will find some additional details in `dbms/tests/instructions/sanitizers.txt
## Fuzzing
As of July 2018 we don't use fuzzing.
We use simple fuzz test to generate random SQL queries and to check that the server doesn't die. Fuzz testing is performed with Address sanitizer. You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
As of December 2018, we still don't use isolated fuzz testing of library code.
## Security Audit
@ -242,12 +249,12 @@ As of July 2018 we don't track test coverage.
## Test Automation
We run tests with Travis CI (available for general public) and Jenkins (available inside Yandex).
We run tests with Yandex internal CI and job automation system named "Sandbox". We also continue to use Jenkins (available inside Yandex).
In Travis CI due to limit on time and computational power we can afford only subset of functional tests that are run with limited build of ClickHouse (debug version with cut off most of libraries). In about half of runs it still fails to finish in 50 minutes timeout. The only advantage - test results are visible for all external contributors.
Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored eternally. When you send a pull request on GitHub, we tag it as "can be tested" and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you.
In Jenkins we run functional tests for each commit and for each pull request from trusted users; the same under ASan; we also run quorum tests, dictionary tests, Metrica B2B tests. We use Jenkins to prepare and publish releases. Worth to note that we are not happy with Jenkins at all.
We don't use Travis CI due to the limit on time and computational power.
One of our goals is to provide reliable testing infrastructure that will be available to community.
In Jenkins we run dictionary tests, Metrica B2B tests. We use Jenkins to prepare and publish releases. Jenkins is a legacy technology and all jobs will be moved to Sandbox.
[Original article](https://clickhouse.yandex/docs/en/development/tests/) <!--hide-->

View File

@ -16,10 +16,12 @@ sh ${CUR_DIR}check-include > $RESULT_FILE 2>&1
echo Results:
echo Top by memory:
cat $RESULT_FILE | sort -rk4 | head -n20
cat $RESULT_FILE | sort -nrk4 | head -n20
echo Top by time:
cat $RESULT_FILE | sort -rk3 | head -n20
cat $RESULT_FILE | sort -nrk3 | head -n20
echo Top by includes:
cat $RESULT_FILE | sort -rk2 | head -n20
cat $RESULT_FILE | sort -nrk2 | head -n20
trap "" EXIT