Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
BayoNet 2019-03-15 18:22:18 +03:00
commit 06f9e89280
255 changed files with 3326 additions and 1392 deletions

View File

@ -293,7 +293,7 @@
### New features: ### New features:
* `DEFAULT` expressions are evaluated for missing fields when loading data in semi-structured input formats (`JSONEachRow`, `TSKV`). [#3555](https://github.com/yandex/ClickHouse/pull/3555) * `DEFAULT` expressions are evaluated for missing fields when loading data in semi-structured input formats (`JSONEachRow`, `TSKV`). The feature is enabled with the `insert_sample_with_metadata` setting. [#3555](https://github.com/yandex/ClickHouse/pull/3555)
* The `ALTER TABLE` query now has the `MODIFY ORDER BY` action for changing the sorting key when adding or removing a table column. This is useful for tables in the `MergeTree` family that perform additional tasks when merging based on this sorting key, such as `SummingMergeTree`, `AggregatingMergeTree`, and so on. [#3581](https://github.com/yandex/ClickHouse/pull/3581) [#3755](https://github.com/yandex/ClickHouse/pull/3755) * The `ALTER TABLE` query now has the `MODIFY ORDER BY` action for changing the sorting key when adding or removing a table column. This is useful for tables in the `MergeTree` family that perform additional tasks when merging based on this sorting key, such as `SummingMergeTree`, `AggregatingMergeTree`, and so on. [#3581](https://github.com/yandex/ClickHouse/pull/3581) [#3755](https://github.com/yandex/ClickHouse/pull/3755)
* For tables in the `MergeTree` family, now you can specify a different sorting key (`ORDER BY`) and index (`PRIMARY KEY`). The sorting key can be longer than the index. [#3581](https://github.com/yandex/ClickHouse/pull/3581) * For tables in the `MergeTree` family, now you can specify a different sorting key (`ORDER BY`) and index (`PRIMARY KEY`). The sorting key can be longer than the index. [#3581](https://github.com/yandex/ClickHouse/pull/3581)
* Added the `hdfs` table function and the `HDFS` table engine for importing and exporting data to HDFS. [chenxing-xc](https://github.com/yandex/ClickHouse/pull/3617) * Added the `hdfs` table function and the `HDFS` table engine for importing and exporting data to HDFS. [chenxing-xc](https://github.com/yandex/ClickHouse/pull/3617)

View File

@ -302,7 +302,7 @@
### Новые возможности: ### Новые возможности:
* Вычисление `DEFAULT` выражений для отсутствующих полей при загрузке данных в полуструктурированных форматах (`JSONEachRow`, `TSKV`). [#3555](https://github.com/yandex/ClickHouse/pull/3555) * Вычисление `DEFAULT` выражений для отсутствующих полей при загрузке данных в полуструктурированных форматах (`JSONEachRow`, `TSKV`) (требуется включить настройку запроса `insert_sample_with_metadata`). [#3555](https://github.com/yandex/ClickHouse/pull/3555)
* Для запроса `ALTER TABLE` добавлено действие `MODIFY ORDER BY` для изменения ключа сортировки при одновременном добавлении или удалении столбца таблицы. Это полезно для таблиц семейства `MergeTree`, выполняющих дополнительную работу при слияниях, согласно этому ключу сортировки, как например, `SummingMergeTree`, `AggregatingMergeTree` и т. п. [#3581](https://github.com/yandex/ClickHouse/pull/3581) [#3755](https://github.com/yandex/ClickHouse/pull/3755) * Для запроса `ALTER TABLE` добавлено действие `MODIFY ORDER BY` для изменения ключа сортировки при одновременном добавлении или удалении столбца таблицы. Это полезно для таблиц семейства `MergeTree`, выполняющих дополнительную работу при слияниях, согласно этому ключу сортировки, как например, `SummingMergeTree`, `AggregatingMergeTree` и т. п. [#3581](https://github.com/yandex/ClickHouse/pull/3581) [#3755](https://github.com/yandex/ClickHouse/pull/3755)
* Для таблиц семейства `MergeTree` появилась возможность указать различный ключ сортировки (`ORDER BY`) и индекс (`PRIMARY KEY`). Ключ сортировки может быть длиннее, чем индекс. [#3581](https://github.com/yandex/ClickHouse/pull/3581) * Для таблиц семейства `MergeTree` появилась возможность указать различный ключ сортировки (`ORDER BY`) и индекс (`PRIMARY KEY`). Ключ сортировки может быть длиннее, чем индекс. [#3581](https://github.com/yandex/ClickHouse/pull/3581)
* Добавлена табличная функция `hdfs` и движок таблиц `HDFS` для импорта и экспорта данных в HDFS. [chenxing-xc](https://github.com/yandex/ClickHouse/pull/3617) * Добавлена табличная функция `hdfs` и движок таблиц `HDFS` для импорта и экспорта данных в HDFS. [chenxing-xc](https://github.com/yandex/ClickHouse/pull/3617)

View File

@ -50,6 +50,7 @@ string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
set (CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) set (CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE)
set (CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs.
option (USE_STATIC_LIBRARIES "Set to FALSE to use shared libraries" ON) option (USE_STATIC_LIBRARIES "Set to FALSE to use shared libraries" ON)
option (MAKE_STATIC_LIBRARIES "Set to FALSE to make shared libraries" ${USE_STATIC_LIBRARIES}) option (MAKE_STATIC_LIBRARIES "Set to FALSE to make shared libraries" ${USE_STATIC_LIBRARIES})
@ -98,10 +99,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON)
if (GLIBC_COMPATIBILITY)
message (STATUS "Some symbols from glibc will be replaced for compatibility")
link_libraries(glibc-compatibility)
endif ()
endif () endif ()
endif () endif ()
@ -177,6 +174,60 @@ set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3
include (cmake/use_libcxx.cmake) include (cmake/use_libcxx.cmake)
# Set standard, system and compiler libraries explicitly.
# This is intended for more control of what we are linking.
set (DEFAULT_LIBS "")
if (OS_LINUX AND NOT UNBUNDLED)
# Note: this probably has no effict, but I'm not an expert in CMake.
set (CMAKE_C_IMPLICIT_LINK_LIBRARIES "")
set (CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
# Disable default linked libraries.
set (DEFAULT_LIBS "-nodefaultlibs")
# Add C++ libraries.
#
# This consist of:
# - C++ standard library (like implementation of std::string);
# - C++ ABI implementation (functions for exceptions like __cxa_throw, RTTI, etc);
# - functions for internal implementation of exception handling (stack unwinding based on DWARF info; TODO replace with bundled libunwind);
# - compiler builtins (example: functions for implementation of __int128 operations);
#
# There are two variants of C++ library: libc++ (from LLVM compiler infrastructure) and libstdc++ (from GCC).
if (USE_LIBCXX)
set (BUILTINS_LIB_PATH "")
if (COMPILER_CLANG)
execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
endif ()
set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lc++ -lc++abi -lgcc_eh ${BUILTINS_LIB_PATH} -Wl,-Bdynamic")
else ()
set (DEFAULT_LIBS "${DEFAULT_LIBS} -Wl,-Bstatic -lstdc++ -lgcc_eh -lgcc -Wl,-Bdynamic")
endif ()
# Linking with GLIBC prevents portability of binaries to older systems.
# We overcome this behaviour by statically linking with our own implementation of all new symbols (that don't exist in older Libc or have infamous "symbol versioning").
# The order of linking is important: 'glibc-compatibility' must be before libc but after all other libraries.
if (GLIBC_COMPATIBILITY)
message (STATUS "Some symbols from glibc will be replaced for compatibility")
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
set (DEFAULT_LIBS "${DEFAULT_LIBS} libs/libglibc-compatibility/libglibc-compatibility${${CMAKE_POSTFIX_VARIABLE}}.a")
endif ()
# Add Libc. GLIBC is actually a collection of interdependent libraries.
set (DEFAULT_LIBS "${DEFAULT_LIBS} -lrt -ldl -lpthread -lm -lc")
# Note: we'd rather use Musl libc library, but it's little bit more difficult to use.
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
endif ()
if (NOT MAKE_STATIC_LIBRARIES) if (NOT MAKE_STATIC_LIBRARIES)
set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif () endif ()
@ -284,3 +335,36 @@ add_subdirectory (utils)
add_subdirectory (dbms) add_subdirectory (dbms)
include (cmake/print_include_directories.cmake) include (cmake/print_include_directories.cmake)
if (DEFAULT_LIBS)
# Add default libs to all targets as the last dependency.
# I have found no better way to specify default libs in CMake that will appear single time in specific order at the end of linker arguments.
function(add_default_libs target_name)
if (TARGET ${target_name})
# message(STATUS "Has target ${target_name}")
set_property(TARGET ${target_name} APPEND PROPERTY LINK_LIBRARIES "${DEFAULT_LIBS}")
set_property(TARGET ${target_name} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "${DEFAULT_LIBS}")
if (GLIBC_COMPATIBILITY)
add_dependencies(${target_name} glibc-compatibility)
endif ()
endif ()
endfunction ()
add_default_libs(ltdl)
add_default_libs(zlibstatic)
add_default_libs(jemalloc)
add_default_libs(unwind)
add_default_libs(memcpy)
add_default_libs(Foundation)
add_default_libs(common)
add_default_libs(gtest)
add_default_libs(lz4)
add_default_libs(zstd)
add_default_libs(snappy)
add_default_libs(arrow)
add_default_libs(protoc)
add_default_libs(thrift_static)
add_default_libs(boost_regex_internal)
endif ()

View File

@ -13,5 +13,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events ## Upcoming Events
* [ClickHouse Community Meetup](https://www.eventbrite.com/e/meetup-clickhouse-in-the-wild-deployment-success-stories-registration-55305051899) in San Francisco on February 19.
* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2. * [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2.

View File

@ -1,3 +1,7 @@
option (ENABLE_BROTLI "Enable brotli" ON)
if (ENABLE_BROTLI)
option (USE_INTERNAL_BROTLI_LIBRARY "Set to FALSE to use system libbrotli library instead of bundled" ${NOT_UNBUNDLED}) option (USE_INTERNAL_BROTLI_LIBRARY "Set to FALSE to use system libbrotli library instead of bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode.h") if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode.h")
@ -27,4 +31,6 @@ elseif (NOT MISSING_INTERNAL_BROTLI_LIBRARY)
set (USE_BROTLI 1) set (USE_BROTLI 1)
endif () endif ()
endif()
message (STATUS "Using brotli=${USE_BROTLI}: ${BROTLI_INCLUDE_DIR} : ${BROTLI_LIBRARY}") message (STATUS "Using brotli=${USE_BROTLI}: ${BROTLI_INCLUDE_DIR} : ${BROTLI_LIBRARY}")

View File

@ -20,11 +20,12 @@ if (NOT GTEST_SRC_DIR AND NOT GTEST_INCLUDE_DIRS AND NOT MISSING_INTERNAL_GTEST_
set (USE_INTERNAL_GTEST_LIBRARY 1) set (USE_INTERNAL_GTEST_LIBRARY 1)
set (GTEST_MAIN_LIBRARIES gtest_main) set (GTEST_MAIN_LIBRARIES gtest_main)
set (GTEST_LIBRARIES gtest) set (GTEST_LIBRARIES gtest)
set (GTEST_BOTH_LIBRARIES ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
set (GTEST_INCLUDE_DIRS ${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest) set (GTEST_INCLUDE_DIRS ${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest)
endif () endif ()
if((GTEST_INCLUDE_DIRS AND GTEST_MAIN_LIBRARIES) OR GTEST_SRC_DIR) if((GTEST_INCLUDE_DIRS AND GTEST_BOTH_LIBRARIES) OR GTEST_SRC_DIR)
set(USE_GTEST 1) set(USE_GTEST 1)
endif() endif()
message (STATUS "Using gtest=${USE_GTEST}: ${GTEST_INCLUDE_DIRS} : ${GTEST_LIBRARIES}, ${GTEST_MAIN_LIBRARIES} : ${GTEST_SRC_DIR}") message (STATUS "Using gtest=${USE_GTEST}: ${GTEST_INCLUDE_DIRS} : ${GTEST_BOTH_LIBRARIES} : ${GTEST_SRC_DIR}")

View File

@ -1,3 +1,7 @@
option (ENABLE_PARQUET "Enable parquet" ON)
if (ENABLE_PARQUET)
if (NOT OS_FREEBSD) # Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory if (NOT OS_FREEBSD) # Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory
option(USE_INTERNAL_PARQUET_LIBRARY "Set to FALSE to use system parquet library instead of bundled" ${NOT_UNBUNDLED}) option(USE_INTERNAL_PARQUET_LIBRARY "Set to FALSE to use system parquet library instead of bundled" ${NOT_UNBUNDLED})
endif() endif()
@ -61,6 +65,8 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
endif() endif()
endif() endif()
endif()
if(USE_PARQUET) if(USE_PARQUET)
message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY}") message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY}")
else() else()

View File

@ -1,10 +1,8 @@
option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ${NOT_UNBUNDLED}) option (ENABLE_PROTOBUF "Enable protobuf" ON)
if(OS_FREEBSD AND SANITIZE STREQUAL "address") if (ENABLE_PROTOBUF)
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ${NOT_UNBUNDLED})
set(USE_INTERNAL_PROTOBUF_LIBRARY 0)
endif()
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt")
if(USE_INTERNAL_PROTOBUF_LIBRARY) if(USE_INTERNAL_PROTOBUF_LIBRARY)
@ -94,4 +92,16 @@ elseif(NOT MISSING_INTERNAL_PROTOBUF_LIBRARY)
endfunction() endfunction()
endif() endif()
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
# #include <sanitizer/asan_interface.h>
if(LLVM_INCLUDE_DIRS)
set(Protobuf_INCLUDE_DIR ${Protobuf_INCLUDE_DIR} ${LLVM_INCLUDE_DIRS})
else()
set(USE_PROTOBUF 0)
endif()
endif()
endif()
message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY}") message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY}")

View File

@ -11,38 +11,13 @@ if (OS_LINUX AND COMPILER_CLANG)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX}) option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX})
set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.")
if (USE_LIBCXX) if (USE_LIBCXX)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # Ok for clang6, for older can cause 'not used option' warning
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build. set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
if (MAKE_STATIC_LIBRARIES)
execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
link_libraries (-nodefaultlibs -Wl,-Bstatic -stdlib=libc++ c++ c++abi gcc_eh ${BUILTINS_LIB_PATH} rt -Wl,-Bdynamic dl pthread m c)
else ()
link_libraries (-stdlib=libc++ c++ c++abi)
endif ()
if (LIBCXX_PATH)
include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
link_directories ("${LIBCXX_PATH}/lib")
endif ()
endif () endif ()
endif () endif ()
if (USE_LIBCXX)
set (STATIC_STDLIB_FLAGS "")
else ()
set (STATIC_STDLIB_FLAGS "-static-libgcc -static-libstdc++")
endif ()
if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (COMPILER_CLANG AND OS_FREEBSD))
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
# Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained.
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${STATIC_STDLIB_FLAGS}")
endif ()
if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE) if (USE_STATIC_LIBRARIES AND HAVE_NO_PIE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG_NO_PIE}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG_NO_PIE}")

2
contrib/cppkafka vendored

@ -1 +1 @@
Subproject commit 860c90e92eee6690aa74a2ca7b7c5c6930dffecd Subproject commit 9b184d881c15cc50784b28688c7c99d3d764db24

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit bd6505cbb0c130b0db695305b9a38546fa880e5a Subproject commit e2131aa752d7e95441e08f9a18304c1445f2576a

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit 363dcad5a23dc29381cc626620e68ae418b3af19 Subproject commit 73295a702cd1c85c11749ade500d713db7099cca

View File

@ -2,6 +2,8 @@
#ifndef _CONFIG_H_ #ifndef _CONFIG_H_
#define _CONFIG_H_ #define _CONFIG_H_
#define ARCH "x86_64" #define ARCH "x86_64"
#define BUILT_WITH "GCC GXX PKGCONFIG OSXLD LIBDL PLUGINS ZLIB SSL SASL_CYRUS ZSTD HDRHISTOGRAM LZ4_EXT SNAPPY SOCKEM SASL_SCRAM CRC32C_HW"
#define CPU "generic" #define CPU "generic"
#define WITHOUT_OPTIMIZATION 0 #define WITHOUT_OPTIMIZATION 0
#define ENABLE_DEVEL 0 #define ENABLE_DEVEL 0

View File

@ -184,7 +184,9 @@ target_link_libraries (clickhouse_common_io
string_utils string_utils
widechar_width widechar_width
${LINK_LIBRARIES_ONLY_ON_X86_64} ${LINK_LIBRARIES_ONLY_ON_X86_64}
PUBLIC
${DOUBLE_CONVERSION_LIBRARIES} ${DOUBLE_CONVERSION_LIBRARIES}
PRIVATE
pocoext pocoext
PUBLIC PUBLIC
${Poco_Net_LIBRARY} ${Poco_Net_LIBRARY}
@ -351,6 +353,6 @@ if (ENABLE_TESTS AND USE_GTEST)
# attach all dbms gtest sources # attach all dbms gtest sources
grep_gtest_sources(${ClickHouse_SOURCE_DIR}/dbms dbms_gtest_sources) grep_gtest_sources(${ClickHouse_SOURCE_DIR}/dbms dbms_gtest_sources)
add_executable(unit_tests_dbms ${dbms_gtest_sources}) add_executable(unit_tests_dbms ${dbms_gtest_sources})
target_link_libraries(unit_tests_dbms PRIVATE gtest_main dbms clickhouse_common_zookeeper) target_link_libraries(unit_tests_dbms PRIVATE ${GTEST_BOTH_LIBRARIES} dbms clickhouse_common_zookeeper)
add_check(unit_tests_dbms) add_check(unit_tests_dbms)
endif () endif ()

View File

@ -1,11 +1,11 @@
# This strings autochanged from release_lib.sh: # This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54415) set(VERSION_REVISION 54417)
set(VERSION_MAJOR 19) set(VERSION_MAJOR 19)
set(VERSION_MINOR 3) set(VERSION_MINOR 5)
set(VERSION_PATCH 4) set(VERSION_PATCH 1)
set(VERSION_GITHASH 263e69e861b769eae7e2bcc79d87673e3a08d376) set(VERSION_GITHASH 628ed349c335b79a441a1bd6e4bc791d61dfe62c)
set(VERSION_DESCRIBE v19.3.4-testing) set(VERSION_DESCRIBE v19.5.1.1-testing)
set(VERSION_STRING 19.3.4) set(VERSION_STRING 19.5.1.1)
# end of autochange # end of autochange
set(VERSION_EXTRA "" CACHE STRING "") set(VERSION_EXTRA "" CACHE STRING "")

View File

@ -22,7 +22,6 @@ endif()
configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h) configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h)
macro(clickhouse_target_link_split_lib target name) macro(clickhouse_target_link_split_lib target name)
if(NOT CLICKHOUSE_ONE_SHARED) if(NOT CLICKHOUSE_ONE_SHARED)
target_link_libraries(${target} PRIVATE clickhouse-${name}-lib) target_link_libraries(${target} PRIVATE clickhouse-${name}-lib)
@ -91,9 +90,9 @@ endif ()
if (CLICKHOUSE_ONE_SHARED) if (CLICKHOUSE_ONE_SHARED)
add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_PERFORMANCE_TEST_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_COMPILER_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_PERFORMANCE_TEST_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_COMPILER_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-lib PUBLIC ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_COMPILER_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_COMPILER_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK})
set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse)
target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_COMPILER_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_COMPILER_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE})
set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "")
endif() endif()
if (CLICKHOUSE_SPLIT_BINARY) if (CLICKHOUSE_SPLIT_BINARY)
@ -112,6 +111,8 @@ if (CLICKHOUSE_SPLIT_BINARY)
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS}) add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS})
add_custom_target (clickhouse ALL DEPENDS clickhouse-bundle) add_custom_target (clickhouse ALL DEPENDS clickhouse-bundle)
install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse)
else () else ()
if (USE_EMBEDDED_COMPILER) if (USE_EMBEDDED_COMPILER)
# before add_executable ! # before add_executable !
@ -123,37 +124,37 @@ else ()
target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
if (ENABLE_CLICKHOUSE_SERVER) if (ENABLE_CLICKHOUSE_SERVER)
target_link_libraries (clickhouse PRIVATE clickhouse-server-lib) clickhouse_target_link_split_lib(clickhouse server)
endif () endif ()
if (ENABLE_CLICKHOUSE_CLIENT) if (ENABLE_CLICKHOUSE_CLIENT)
target_link_libraries (clickhouse PRIVATE clickhouse-client-lib) clickhouse_target_link_split_lib(clickhouse client)
endif () endif ()
if (ENABLE_CLICKHOUSE_LOCAL) if (ENABLE_CLICKHOUSE_LOCAL)
target_link_libraries (clickhouse PRIVATE clickhouse-local-lib) clickhouse_target_link_split_lib(clickhouse local)
endif () endif ()
if (ENABLE_CLICKHOUSE_BENCHMARK) if (ENABLE_CLICKHOUSE_BENCHMARK)
target_link_libraries (clickhouse PRIVATE clickhouse-benchmark-lib) clickhouse_target_link_split_lib(clickhouse benchmark)
endif () endif ()
if (ENABLE_CLICKHOUSE_PERFORMANCE_TEST) if (ENABLE_CLICKHOUSE_PERFORMANCE_TEST)
target_link_libraries (clickhouse PRIVATE clickhouse-performance-test-lib) clickhouse_target_link_split_lib(clickhouse performance-test)
endif () endif ()
if (ENABLE_CLICKHOUSE_COPIER) if (ENABLE_CLICKHOUSE_COPIER)
target_link_libraries (clickhouse PRIVATE clickhouse-copier-lib) clickhouse_target_link_split_lib(clickhouse copier)
endif () endif ()
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
target_link_libraries (clickhouse PRIVATE clickhouse-extract-from-config-lib) clickhouse_target_link_split_lib(clickhouse extract-from-config)
endif () endif ()
if (ENABLE_CLICKHOUSE_COMPRESSOR) if (ENABLE_CLICKHOUSE_COMPRESSOR)
target_link_libraries (clickhouse PRIVATE clickhouse-compressor-lib) clickhouse_target_link_split_lib(clickhouse compressor)
endif () endif ()
if (ENABLE_CLICKHOUSE_FORMAT) if (ENABLE_CLICKHOUSE_FORMAT)
target_link_libraries (clickhouse PRIVATE clickhouse-format-lib) clickhouse_target_link_split_lib(clickhouse format)
endif () endif ()
if (ENABLE_CLICKHOUSE_OBFUSCATOR) if (ENABLE_CLICKHOUSE_OBFUSCATOR)
target_link_libraries (clickhouse PRIVATE clickhouse-obfuscator-lib) clickhouse_target_link_split_lib(clickhouse obfuscator)
endif () endif ()
if (USE_EMBEDDED_COMPILER) if (USE_EMBEDDED_COMPILER)
target_link_libraries (clickhouse PRIVATE clickhouse-compiler-lib) clickhouse_target_link_split_lib(clickhouse compiler)
endif () endif ()
set (CLICKHOUSE_BUNDLE) set (CLICKHOUSE_BUNDLE)

View File

@ -0,0 +1,6 @@
#!/bin/sh
set -e
CMD=$1
shift
clickhouse-$CMD $*

View File

@ -1,5 +1,5 @@
set(CLICKHOUSE_COPIER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp) set(CLICKHOUSE_COPIER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp)
set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_functions clickhouse_table_functions clickhouse_aggregate_functions daemon) set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_functions clickhouse_table_functions clickhouse_aggregate_functions PUBLIC daemon)
#set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ...) set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR})
clickhouse_program_add(copier) clickhouse_program_add(copier)

View File

@ -1,3 +1,4 @@
#include <new>
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <string> #include <string>
@ -17,12 +18,6 @@
#include <gperftools/malloc_extension.h> // Y_IGNORE #include <gperftools/malloc_extension.h> // Y_IGNORE
#endif #endif
#if ENABLE_CLICKHOUSE_SERVER
#include "server/Server.h"
#endif
#if ENABLE_CLICKHOUSE_LOCAL
#include "local/LocalServer.h"
#endif
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
/// Universal executable for various clickhouse applications /// Universal executable for various clickhouse applications
@ -145,6 +140,10 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
int main(int argc_, char ** argv_) int main(int argc_, char ** argv_)
{ {
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.
std::set_new_handler(nullptr);
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER
if (argc_ >= 2 && 0 == strcmp(argv_[1], "-cc1")) if (argc_ >= 2 && 0 == strcmp(argv_[1], "-cc1"))
return mainEntryClickHouseClang(argc_, argv_); return mainEntryClickHouseClang(argc_, argv_);

View File

@ -11,7 +11,7 @@ set(CLICKHOUSE_ODBC_BRIDGE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/validateODBCConnectionString.cpp ${CMAKE_CURRENT_SOURCE_DIR}/validateODBCConnectionString.cpp
) )
set(CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE daemon dbms clickhouse_common_io) set(CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE dbms clickhouse_common_io PUBLIC daemon)
set(CLICKHOUSE_ODBC_BRIDGE_INCLUDE PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) set(CLICKHOUSE_ODBC_BRIDGE_INCLUDE PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include)
if (USE_POCO_SQLODBC) if (USE_POCO_SQLODBC)

View File

@ -10,7 +10,7 @@ set(CLICKHOUSE_SERVER_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/TCPHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/TCPHandler.cpp
) )
set(CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_dictionaries clickhouse_common_io daemon clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Poco_Net_LIBRARY}) set(CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_dictionaries clickhouse_common_io PUBLIC daemon PRIVATE clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Poco_Net_LIBRARY})
if (USE_POCO_NETSSL) if (USE_POCO_NETSSL)
set(CLICKHOUSE_SERVER_LINK ${CLICKHOUSE_SERVER_LINK} PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) set(CLICKHOUSE_SERVER_LINK ${CLICKHOUSE_SERVER_LINK} PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY})
endif () endif ()

View File

@ -260,6 +260,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
StatusFile status{path + "status"}; StatusFile status{path + "status"};
SCOPE_EXIT({ SCOPE_EXIT({
/** Ask to cancel background jobs all table engines,
* and also query_log.
* It is important to do early, not in destructor of Context, because
* table engines could use Context on destroy.
*/
LOG_INFO(log, "Shutting down storages.");
global_context->shutdown();
LOG_DEBUG(log, "Shutted down storages.");
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
* At this moment, no one could own shared part of Context. * At this moment, no one could own shared part of Context.
*/ */
@ -498,17 +507,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setCurrentDatabase(default_database); global_context->setCurrentDatabase(default_database);
SCOPE_EXIT({
/** Ask to cancel background jobs all table engines,
* and also query_log.
* It is important to do early, not in destructor of Context, because
* table engines could use Context on destroy.
*/
LOG_INFO(log, "Shutting down storages.");
global_context->shutdown();
LOG_DEBUG(log, "Shutted down storages.");
});
if (has_zookeeper && config().has("distributed_ddl")) if (has_zookeeper && config().has("distributed_ddl"))
{ {
/// DDL worker should be started after all tables were loaded /// DDL worker should be started after all tables were loaded

View File

@ -12,6 +12,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
} }
namespace namespace

View File

@ -6,6 +6,7 @@
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <roaring.hh> #include <roaring.hh>
#include <Common/HashTable/SmallTable.h> #include <Common/HashTable/SmallTable.h>
#include <Common/PODArray.h>
namespace DB namespace DB
{ {

View File

@ -163,7 +163,7 @@ public:
size_t old_size = data_to.size(); size_t old_size = data_to.size();
data_to.resize(data_to.size() + size); data_to.resize(data_to.size() + size);
data.getManyFloat(levels.levels.data(), levels.permutation.data(), size, &data_to[old_size]); data.getManyFloat(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
} }
else else
{ {
@ -171,7 +171,7 @@ public:
size_t old_size = data_to.size(); size_t old_size = data_to.size();
data_to.resize(data_to.size() + size); data_to.resize(data_to.size() + size);
data.getMany(levels.levels.data(), levels.permutation.data(), size, &data_to[old_size]); data.getMany(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
} }
} }
else else

View File

@ -39,19 +39,19 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
template <bool is_weighted> template <bool is_weighted>
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold, const Array & params) static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold, UInt64 load_factor, const Array & params)
{ {
WhichDataType which(argument_type); WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) if (which.idx == TypeIndex::Date)
return new AggregateFunctionTopKDate<is_weighted>(threshold, {argument_type}, params); return new AggregateFunctionTopKDate<is_weighted>(threshold, load_factor, {argument_type}, params);
if (which.idx == TypeIndex::DateTime) if (which.idx == TypeIndex::DateTime)
return new AggregateFunctionTopKDateTime<is_weighted>(threshold, {argument_type}, params); return new AggregateFunctionTopKDateTime<is_weighted>(threshold, load_factor, {argument_type}, params);
/// Check that we can use plain version of AggregateFunctionTopKGeneric /// Check that we can use plain version of AggregateFunctionTopKGeneric
if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion()) if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, argument_type, params); return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, load_factor, argument_type, params);
else else
return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, argument_type, params); return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, load_factor, argument_type, params);
} }
@ -65,19 +65,28 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
else else
{ {
assertBinary(name, argument_types); assertBinary(name, argument_types);
if (!isNumber(argument_types[1])) if (!isInteger(argument_types[1]))
throw Exception("The second argument for aggregate function 'topKWeighted' must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); throw Exception("The second argument for aggregate function 'topKWeighted' must have integer type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
} }
UInt64 threshold = 10; /// default value UInt64 threshold = 10; /// default values
UInt64 load_factor = 3;
if (!params.empty()) if (!params.empty())
{ {
if (params.size() != 1) if (params.size() > 2)
throw Exception("Aggregate function " + name + " requires one parameter or less.", throw Exception("Aggregate function " + name + " requires two parameters or less.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]); UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (params.size() == 2)
{
load_factor = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
if (load_factor < 1)
throw Exception("Too small parameter for aggregate function " + name + ". Minimum: 1",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
if (k > TOP_K_MAX_SIZE) if (k > TOP_K_MAX_SIZE)
throw Exception("Too large parameter for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE), throw Exception("Too large parameter for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE),
@ -90,10 +99,10 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
threshold = k; threshold = k;
} }
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK, is_weighted>(*argument_types[0], threshold, argument_types, params)); AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK, is_weighted>(*argument_types[0], threshold, load_factor, argument_types, params));
if (!res) if (!res)
res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types[0], threshold, params)); res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types[0], threshold, load_factor, params));
if (!res) if (!res)
throw Exception("Illegal type " + argument_types[0]->getName() + throw Exception("Illegal type " + argument_types[0]->getName() +

View File

@ -20,10 +20,6 @@ namespace DB
{ {
// Allow NxK more space before calculating top K to increase accuracy
#define TOP_K_LOAD_FACTOR 3
template <typename T> template <typename T>
struct AggregateFunctionTopKData struct AggregateFunctionTopKData
{ {
@ -48,9 +44,9 @@ protected:
UInt64 reserved; UInt64 reserved;
public: public:
AggregateFunctionTopK(UInt64 threshold, const DataTypes & argument_types_, const Array & params) AggregateFunctionTopK(UInt64 threshold, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params) : IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params)
, threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold) {} , threshold(threshold), reserved(load_factor * threshold) {}
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
@ -143,9 +139,9 @@ private:
public: public:
AggregateFunctionTopKGeneric( AggregateFunctionTopKGeneric(
UInt64 threshold, const DataTypePtr & input_data_type, const Array & params) UInt64 threshold, UInt64 load_factor, const DataTypePtr & input_data_type, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>({input_data_type}, params) : IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>({input_data_type}, params)
, threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold), input_data_type(this->argument_types[0]) {} , threshold(threshold), reserved(load_factor * threshold), input_data_type(this->argument_types[0]) {}
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
@ -238,6 +234,4 @@ public:
const char * getHeaderFilePath() const override { return __FILE__; } const char * getHeaderFilePath() const override { return __FILE__; }
}; };
#undef TOP_K_LOAD_FACTOR
} }

View File

@ -85,7 +85,7 @@ class QuantileTDigest
Params params; Params params;
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32); static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
using Summary = PODArray<Centroid, bytes_in_arena / sizeof(Centroid), AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>; using Summary = PODArray<Centroid, bytes_in_arena / sizeof(Centroid), AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;

View File

@ -152,7 +152,7 @@ void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start
size_t old_size = data.size(); size_t old_size = data.size();
data.resize(old_size + length); data.resize(old_size + length);
memcpy(&data[old_size], &from_concrete.data[start], length * sizeof(data[0])); memcpy(data.data() + old_size, &from_concrete.data[start], length * sizeof(data[0]));
} }
} }
@ -255,6 +255,11 @@ size_t ColumnAggregateFunction::allocatedBytes() const
return res; return res;
} }
void ColumnAggregateFunction::protect()
{
data.protect();
}
MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const
{ {
return create(func, Arenas(1, std::make_shared<Arena>())); return create(func, Arenas(1, std::make_shared<Arena>()));

View File

@ -157,6 +157,8 @@ public:
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
void protect() override;
void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; void insertRangeFrom(const IColumn & from, size_t start, size_t length) override;
void popBack(size_t n) override; void popBack(size_t n) override;

View File

@ -311,6 +311,13 @@ size_t ColumnArray::allocatedBytes() const
} }
void ColumnArray::protect()
{
getData().protect();
getOffsets().protect();
}
bool ColumnArray::hasEqualOffsets(const ColumnArray & other) const bool ColumnArray::hasEqualOffsets(const ColumnArray & other) const
{ {
if (offsets == other.offsets) if (offsets == other.offsets)

View File

@ -3,17 +3,12 @@
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Columns/ColumnVector.h> #include <Columns/ColumnVector.h>
#include <Core/Defines.h> #include <Core/Defines.h>
#include <Common/typeid_cast.h>
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
}
/** A column of array values. /** A column of array values.
* In memory, it is represented as one column of a nested type, whose size is equal to the sum of the sizes of all arrays, * In memory, it is represented as one column of a nested type, whose size is equal to the sum of the sizes of all arrays,
* and as an array of offsets in it, which allows you to get each element. * and as an array of offsets in it, which allows you to get each element.
@ -78,6 +73,7 @@ public:
void reserve(size_t n) override; void reserve(size_t n) override;
size_t byteSize() const override; size_t byteSize() const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
ColumnPtr convertToFullColumnIfConst() const override; ColumnPtr convertToFullColumnIfConst() const override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;
@ -120,6 +116,13 @@ public:
callback(data); callback(data);
} }
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnArray *>(&rhs))
return data->structureEquals(*rhs_concrete->data);
return false;
}
private: private:
ColumnPtr data; ColumnPtr data;
ColumnPtr offsets; ColumnPtr offsets;

View File

@ -3,6 +3,7 @@
#include <Core/Field.h> #include <Core/Field.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/typeid_cast.h>
namespace DB namespace DB
@ -190,6 +191,13 @@ public:
callback(data); callback(data);
} }
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnConst *>(&rhs))
return data->structureEquals(*rhs_concrete->data);
return false;
}
bool onlyNull() const override { return data->isNullAt(0); } bool onlyNull() const override { return data->isNullAt(0); }
bool isColumnConst() const override { return true; } bool isColumnConst() const override { return true; }
bool isNumeric() const override { return data->isNumeric(); } bool isNumeric() const override { return data->isNumeric(); }

View File

@ -140,7 +140,7 @@ void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
size_t old_size = data.size(); size_t old_size = data.size();
data.resize(old_size + length); data.resize(old_size + length);
memcpy(&data[old_size], &src_vec.data[start], length * sizeof(data[0])); memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
} }
template <typename T> template <typename T>

View File

@ -2,6 +2,7 @@
#include <cmath> #include <cmath>
#include <Common/typeid_cast.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Columns/ColumnVectorHelper.h> #include <Columns/ColumnVectorHelper.h>
@ -87,6 +88,7 @@ public:
size_t size() const override { return data.size(); } size_t size() const override { return data.size(); }
size_t byteSize() const override { return data.size() * sizeof(data[0]); } size_t byteSize() const override { return data.size() * sizeof(data[0]); }
size_t allocatedBytes() const override { return data.allocated_bytes(); } size_t allocatedBytes() const override { return data.allocated_bytes(); }
void protect() override { data.protect(); }
void reserve(size_t n) override { data.reserve(n); } void reserve(size_t n) override { data.reserve(n); }
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); } void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
@ -132,6 +134,13 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override; void gather(ColumnGathererStream & gatherer_stream) override;
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnDecimal<T> *>(&rhs))
return scale == rhs_concrete->scale;
return false;
}
void insert(const T value) { data.push_back(value); } void insert(const T value) { data.push_back(value); }
Container & getData() { return data; } Container & getData() { return data; }

View File

@ -55,7 +55,7 @@ void ColumnFixedString::insert(const Field & x)
size_t old_size = chars.size(); size_t old_size = chars.size();
chars.resize_fill(old_size + n); chars.resize_fill(old_size + n);
memcpy(&chars[old_size], s.data(), s.size()); memcpy(chars.data() + old_size, s.data(), s.size());
} }
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
@ -67,7 +67,7 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
size_t old_size = chars.size(); size_t old_size = chars.size();
chars.resize(old_size + n); chars.resize(old_size + n);
memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[n * index], n); memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n);
} }
void ColumnFixedString::insertData(const char * pos, size_t length) void ColumnFixedString::insertData(const char * pos, size_t length)
@ -77,7 +77,7 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
size_t old_size = chars.size(); size_t old_size = chars.size();
chars.resize_fill(old_size + n); chars.resize_fill(old_size + n);
memcpy(&chars[old_size], pos, length); memcpy(chars.data() + old_size, pos, length);
} }
StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const
@ -91,7 +91,7 @@ const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos)
{ {
size_t old_size = chars.size(); size_t old_size = chars.size();
chars.resize(old_size + n); chars.resize(old_size + n);
memcpy(&chars[old_size], pos, n); memcpy(chars.data() + old_size, pos, n);
return pos + n; return pos + n;
} }
@ -151,7 +151,7 @@ void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_
size_t old_size = chars.size(); size_t old_size = chars.size();
chars.resize(old_size + length * n); chars.resize(old_size + length * n);
memcpy(&chars[old_size], &src_concrete.chars[start * n], length * n); memcpy(chars.data() + old_size, &src_concrete.chars[start * n], length * n);
} }
ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const

View File

@ -2,6 +2,7 @@
#include <Common/PODArray.h> #include <Common/PODArray.h>
#include <Common/memcmpSmall.h> #include <Common/memcmpSmall.h>
#include <Common/typeid_cast.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Columns/ColumnVectorHelper.h> #include <Columns/ColumnVectorHelper.h>
@ -57,6 +58,11 @@ public:
return chars.allocated_bytes() + sizeof(n); return chars.allocated_bytes() + sizeof(n);
} }
void protect() override
{
chars.protect();
}
Field operator[](size_t index) const override Field operator[](size_t index) const override
{ {
return String(reinterpret_cast<const char *>(&chars[n * index]), n); return String(reinterpret_cast<const char *>(&chars[n * index]), n);
@ -129,6 +135,12 @@ public:
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnFixedString *>(&rhs))
return n == rhs_concrete->n;
return false;
}
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }

View File

@ -363,7 +363,6 @@ ColumnPtr ColumnLowCardinality::countKeys() const
} }
ColumnLowCardinality::Index::Index() : positions(ColumnUInt8::create()), size_of_type(sizeof(UInt8)) {} ColumnLowCardinality::Index::Index() : positions(ColumnUInt8::create()), size_of_type(sizeof(UInt8)) {}
ColumnLowCardinality::Index::Index(MutableColumnPtr && positions) : positions(std::move(positions)) ColumnLowCardinality::Index::Index(MutableColumnPtr && positions) : positions(std::move(positions))

View File

@ -5,6 +5,7 @@
#include <AggregateFunctions/AggregateFunctionCount.h> #include <AggregateFunctions/AggregateFunctionCount.h>
#include "ColumnsNumber.h" #include "ColumnsNumber.h"
namespace DB namespace DB
{ {
@ -132,6 +133,14 @@ public:
callback(dictionary.getColumnUniquePtr()); callback(dictionary.getColumnUniquePtr());
} }
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_low_cardinality = typeid_cast<const ColumnLowCardinality *>(&rhs))
return idx.getPositions()->structureEquals(*rhs_low_cardinality->idx.getPositions())
&& dictionary.getColumnUnique().structureEquals(rhs_low_cardinality->dictionary.getColumnUnique());
return false;
}
bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); } bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); }
bool isFixedAndContiguous() const override { return false; } bool isFixedAndContiguous() const override { return false; }
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }

View File

@ -23,6 +23,11 @@ public:
MutableColumnPtr cloneDummy(size_t s_) const override { return ColumnNothing::create(s_); } MutableColumnPtr cloneDummy(size_t s_) const override { return ColumnNothing::create(s_); }
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }
bool structureEquals(const IColumn & rhs) const override
{
return typeid(rhs) == typeid(ColumnNothing);
}
}; };
} }

View File

@ -291,6 +291,12 @@ size_t ColumnNullable::allocatedBytes() const
return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes(); return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes();
} }
void ColumnNullable::protect()
{
getNestedColumn().protect();
getNullMapColumn().protect();
}
namespace namespace
{ {

View File

@ -2,6 +2,8 @@
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Common/typeid_cast.h>
namespace DB namespace DB
{ {
@ -71,6 +73,7 @@ public:
void reserve(size_t n) override; void reserve(size_t n) override;
size_t byteSize() const override; size_t byteSize() const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
void protect() override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;
@ -88,6 +91,13 @@ public:
callback(null_map); callback(null_map);
} }
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_nullable = typeid_cast<const ColumnNullable *>(&rhs))
return nested_column->structureEquals(*rhs_nullable->nested_column);
return false;
}
bool isColumnNullable() const override { return true; } bool isColumnNullable() const override { return true; }
bool isFixedAndContiguous() const override { return false; } bool isFixedAndContiguous() const override { return false; }
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }

View File

@ -185,7 +185,7 @@ const char * ColumnString::deserializeAndInsertFromArena(const char * pos)
const size_t old_size = chars.size(); const size_t old_size = chars.size();
const size_t new_size = old_size + string_size; const size_t new_size = old_size + string_size;
chars.resize(new_size); chars.resize(new_size);
memcpy(&chars[old_size], pos, string_size); memcpy(chars.data() + old_size, pos, string_size);
offsets.push_back(new_size); offsets.push_back(new_size);
return pos + string_size; return pos + string_size;
@ -412,4 +412,11 @@ void ColumnString::getPermutationWithCollation(const Collator & collator, bool r
} }
} }
void ColumnString::protect()
{
getChars().protect();
getOffsets().protect();
}
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <string.h> #include <cstring>
#include <cassert>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/PODArray.h> #include <Common/PODArray.h>
@ -67,25 +68,31 @@ public:
return chars.allocated_bytes() + offsets.allocated_bytes(); return chars.allocated_bytes() + offsets.allocated_bytes();
} }
void protect() override;
MutableColumnPtr cloneResized(size_t to_size) const override; MutableColumnPtr cloneResized(size_t to_size) const override;
Field operator[](size_t n) const override Field operator[](size_t n) const override
{ {
assert(n < size());
return Field(&chars[offsetAt(n)], sizeAt(n) - 1); return Field(&chars[offsetAt(n)], sizeAt(n) - 1);
} }
void get(size_t n, Field & res) const override void get(size_t n, Field & res) const override
{ {
assert(n < size());
res.assignString(&chars[offsetAt(n)], sizeAt(n) - 1); res.assignString(&chars[offsetAt(n)], sizeAt(n) - 1);
} }
StringRef getDataAt(size_t n) const override StringRef getDataAt(size_t n) const override
{ {
assert(n < size());
return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1); return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1);
} }
StringRef getDataAtWithTerminatingZero(size_t n) const override StringRef getDataAtWithTerminatingZero(size_t n) const override
{ {
assert(n < size());
return StringRef(&chars[offsetAt(n)], sizeAt(n)); return StringRef(&chars[offsetAt(n)], sizeAt(n));
} }
@ -103,7 +110,7 @@ public:
const size_t new_size = old_size + size_to_append; const size_t new_size = old_size + size_to_append;
chars.resize(new_size); chars.resize(new_size);
memcpy(&chars[old_size], s.c_str(), size_to_append); memcpy(chars.data() + old_size, s.c_str(), size_to_append);
offsets.push_back(new_size); offsets.push_back(new_size);
} }
@ -114,36 +121,22 @@ public:
void insertFrom(const IColumn & src_, size_t n) override void insertFrom(const IColumn & src_, size_t n) override
{ {
const ColumnString & src = static_cast<const ColumnString &>(src_); const ColumnString & src = static_cast<const ColumnString &>(src_);
const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray.
if (n != 0) if (size_to_append == 1)
{ {
const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// shortcut for empty string
chars.push_back(0);
if (size_to_append == 1) offsets.push_back(chars.size());
{
/// shortcut for empty string
chars.push_back(0);
offsets.push_back(chars.size());
}
else
{
const size_t old_size = chars.size();
const size_t offset = src.offsets[n - 1];
const size_t new_size = old_size + size_to_append;
chars.resize(new_size);
memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[offset], size_to_append);
offsets.push_back(new_size);
}
} }
else else
{ {
const size_t old_size = chars.size(); const size_t old_size = chars.size();
const size_t size_to_append = src.offsets[0]; const size_t offset = src.offsets[n - 1];
const size_t new_size = old_size + size_to_append; const size_t new_size = old_size + size_to_append;
chars.resize(new_size); chars.resize(new_size);
memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[0], size_to_append); memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[offset], size_to_append);
offsets.push_back(new_size); offsets.push_back(new_size);
} }
} }
@ -155,7 +148,7 @@ public:
chars.resize(new_size); chars.resize(new_size);
if (length) if (length)
memcpy(&chars[old_size], pos, length); memcpy(chars.data() + old_size, pos, length);
chars[old_size + length] = 0; chars[old_size + length] = 0;
offsets.push_back(new_size); offsets.push_back(new_size);
} }
@ -167,7 +160,7 @@ public:
const size_t new_size = old_size + length; const size_t new_size = old_size + length;
chars.resize(new_size); chars.resize(new_size);
memcpy(&chars[old_size], pos, length); memcpy(chars.data() + old_size, pos, length);
offsets.push_back(new_size); offsets.push_back(new_size);
} }
@ -238,6 +231,11 @@ public:
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }
bool structureEquals(const IColumn & rhs) const override
{
return typeid(rhs) == typeid(ColumnString);
}
Chars & getChars() { return chars; } Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; } const Chars & getChars() const { return chars; }

View File

@ -4,6 +4,7 @@
#include <IO/Operators.h> #include <IO/Operators.h>
#include <ext/map.h> #include <ext/map.h>
#include <ext/range.h> #include <ext/range.h>
#include <Common/typeid_cast.h>
namespace DB namespace DB
@ -315,6 +316,12 @@ size_t ColumnTuple::allocatedBytes() const
return res; return res;
} }
void ColumnTuple::protect()
{
for (auto & column : columns)
column->assumeMutableRef().protect();
}
void ColumnTuple::getExtremes(Field & min, Field & max) const void ColumnTuple::getExtremes(Field & min, Field & max) const
{ {
const size_t tuple_size = columns.size(); const size_t tuple_size = columns.size();
@ -335,6 +342,23 @@ void ColumnTuple::forEachSubcolumn(ColumnCallback callback)
callback(column); callback(column);
} }
bool ColumnTuple::structureEquals(const IColumn & rhs) const
{
if (auto rhs_tuple = typeid_cast<const ColumnTuple *>(&rhs))
{
const size_t tuple_size = columns.size();
if (tuple_size != rhs_tuple->columns.size())
return false;
for (const auto i : ext::range(0, tuple_size))
if (!columns[i]->structureEquals(*rhs_tuple->columns[i]))
return false;
return true;
}
else
return false;
}
} }

View File

@ -71,7 +71,9 @@ public:
void reserve(size_t n) override; void reserve(size_t n) override;
size_t byteSize() const override; size_t byteSize() const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override; void forEachSubcolumn(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
size_t tupleSize() const { return columns.size(); } size_t tupleSize() const { return columns.size(); }

View File

@ -80,6 +80,7 @@ public:
bool isNumeric() const override { return column_holder->isNumeric(); } bool isNumeric() const override { return column_holder->isNumeric(); }
size_t byteSize() const override { return column_holder->byteSize(); } size_t byteSize() const override { return column_holder->byteSize(); }
void protect() override { column_holder->assumeMutableRef().protect(); }
size_t allocatedBytes() const override size_t allocatedBytes() const override
{ {
return column_holder->allocatedBytes() return column_holder->allocatedBytes()
@ -94,6 +95,13 @@ public:
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask); nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
} }
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnUnique *>(&rhs))
return column_holder->structureEquals(*rhs_concrete->column_holder);
return false;
}
const UInt64 * tryGetSavedHash() const override { return index.tryGetSavedHash(); } const UInt64 * tryGetSavedHash() const override { return index.tryGetSavedHash(); }
UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); } UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); }

View File

@ -141,7 +141,7 @@ void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
size_t old_size = data.size(); size_t old_size = data.size();
data.resize(old_size + length); data.resize(old_size + length);
memcpy(&data[old_size], &src_vec.data[start], length * sizeof(data[0])); memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
} }
template <typename T> template <typename T>

View File

@ -163,6 +163,11 @@ public:
return data.allocated_bytes(); return data.allocated_bytes();
} }
void protect() override
{
data.protect();
}
void insertValue(const T value) void insertValue(const T value)
{ {
data.push_back(value); data.push_back(value);
@ -246,6 +251,12 @@ public:
size_t sizeOfValueIfFixed() const override { return sizeof(T); } size_t sizeOfValueIfFixed() const override { return sizeof(T); }
StringRef getRawData() const override { return StringRef(reinterpret_cast<const char*>(data.data()), data.size()); } StringRef getRawData() const override { return StringRef(reinterpret_cast<const char*>(data.data()), data.size()); }
bool structureEquals(const IColumn & rhs) const override
{
return typeid(rhs) == typeid(ColumnVector<T>);
}
/** More efficient methods of manipulation - to manipulate with data directly. */ /** More efficient methods of manipulation - to manipulate with data directly. */
Container & getData() Container & getData()
{ {

View File

@ -24,9 +24,10 @@ namespace DB
class ColumnVectorHelper : public IColumn class ColumnVectorHelper : public IColumn
{ {
public: public:
template <size_t ELEMENT_SIZE>
const char * getRawDataBegin() const const char * getRawDataBegin() const
{ {
return *reinterpret_cast<const char * const *>(reinterpret_cast<const char *>(this) + sizeof(*this)); return reinterpret_cast<const PODArrayBase<ELEMENT_SIZE, 4096, Allocator<false>, 15, 16> *>(reinterpret_cast<const char *>(this) + sizeof(*this))->raw_data();
} }
template <size_t ELEMENT_SIZE> template <size_t ELEMENT_SIZE>

View File

@ -253,11 +253,22 @@ public:
/// Zero, if could be determined. /// Zero, if could be determined.
virtual size_t allocatedBytes() const = 0; virtual size_t allocatedBytes() const = 0;
/// Make memory region readonly with mprotect if it is large enough.
/// The operation is slow and performed only for debug builds.
virtual void protect() {}
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them. /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
/// Shallow: doesn't do recursive calls; don't do call for itself. /// Shallow: doesn't do recursive calls; don't do call for itself.
using ColumnCallback = std::function<void(Ptr&)>; using ColumnCallback = std::function<void(Ptr&)>;
virtual void forEachSubcolumn(ColumnCallback) {} virtual void forEachSubcolumn(ColumnCallback) {}
/// Columns have equal structure.
/// If true - you can use "compareAt", "insertFrom", etc. methods.
virtual bool structureEquals(const IColumn &) const
{
throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
MutablePtr mutate() const && MutablePtr mutate() const &&
{ {

View File

@ -1,4 +1,4 @@
if(USE_GTEST) if(USE_GTEST)
add_executable(column_unique column_unique.cpp) add_executable(column_unique column_unique.cpp)
target_link_libraries(column_unique PRIVATE dbms gtest_main) target_link_libraries(column_unique PRIVATE dbms ${GTEST_BOTH_LIBRARIES})
endif() endif()

View File

@ -5,6 +5,7 @@
#endif #endif
#include <cstdlib> #include <cstdlib>
#include <algorithm>
#include <sys/mman.h> #include <sys/mman.h>
#include <common/mremap.h> #include <common/mremap.h>
@ -42,11 +43,30 @@ namespace ErrorCodes
* *
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB. * PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
*/ */
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20); #ifdef NDEBUG
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
#else
/// In debug build, use small mmap threshold to reproduce more memory stomping bugs.
/// Along with ASLR it will hopefully detect more issues than ASan.
/// The program may fail due to the limit on number of memory mappings.
static constexpr size_t MMAP_THRESHOLD = 4096;
#endif
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
template <bool clear_memory_>
void * Allocator<clear_memory_>::mmap_hint()
{
#if ALLOCATOR_ASLR
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(rng));
#else
return nullptr;
#endif
}
template <bool clear_memory_> template <bool clear_memory_>
void * Allocator<clear_memory_>::alloc(size_t size, size_t alignment) void * Allocator<clear_memory_>::alloc(size_t size, size_t alignment)
{ {
@ -60,7 +80,7 @@ void * Allocator<clear_memory_>::alloc(size_t size, size_t alignment)
throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating " throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
+ formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS); + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
buf = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); buf = mmap(mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == buf) if (MAP_FAILED == buf)
DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
@ -118,9 +138,11 @@ void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new
if (old_size == new_size) if (old_size == new_size)
{ {
/// nothing to do. /// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
} }
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD && alignment <= MALLOC_MIN_ALIGNMENT) else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD && alignment <= MALLOC_MIN_ALIGNMENT)
{ {
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size); CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size); void * new_buf = ::realloc(buf, new_size);
@ -133,6 +155,7 @@ void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new
} }
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{ {
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size); CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h) // On apple and freebsd self-implemented mremap used (common/mremap.h)
@ -142,21 +165,12 @@ void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new
/// No need for zero-fill, because mmap guarantees it. /// No need for zero-fill, because mmap guarantees it.
} }
else if (old_size >= MMAP_THRESHOLD && new_size < MMAP_THRESHOLD)
{
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, new_size);
if (0 != munmap(buf, old_size))
{
::free(new_buf);
DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(old_size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
}
buf = new_buf;
}
else else
{ {
/// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment); void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, old_size); memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size); free(buf, old_size);
buf = new_buf; buf = new_buf;
} }

View File

@ -2,6 +2,19 @@
#include <string.h> #include <string.h>
#ifdef NDEBUG
/// If set to 1 - randomize memory mappings manually (address space layout randomization) to reproduce more memory stomping bugs.
/// Note that Linux doesn't do it by default. This may lead to worse TLB performance.
#define ALLOCATOR_ASLR 0
#else
#define ALLOCATOR_ASLR 1
#endif
#if ALLOCATOR_ASLR
#include <pcg_random.hpp>
#include <Common/randomSeed.h>
#endif
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena. /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables. * Also used in hash tables.
@ -14,6 +27,12 @@
template <bool clear_memory_> template <bool clear_memory_>
class Allocator class Allocator
{ {
#if ALLOCATOR_ASLR
private:
pcg64 rng{randomSeed()};
#endif
void * mmap_hint();
protected: protected:
static constexpr bool clear_memory = clear_memory_; static constexpr bool clear_memory = clear_memory_;

View File

@ -55,6 +55,28 @@ public:
return locus; return locus;
} }
/// Used only in arcadia/metrika
void readText(ReadBuffer & in)
{
for (size_t i = 0; i < BITSET_SIZE; ++i)
{
if (i != 0)
assertChar(',', in);
readIntText(bitset[i], in);
}
}
/// Used only in arcadia/metrika
void writeText(WriteBuffer & out) const
{
for (size_t i = 0; i < BITSET_SIZE; ++i)
{
if (i != 0)
writeCString(",", out);
writeIntText(bitset[i], out);
}
}
private: private:
/// number of bytes in bitset /// number of bytes in bitset
static constexpr size_t BITSET_SIZE = (static_cast<size_t>(bucket_count) * content_width + 7) / 8; static constexpr size_t BITSET_SIZE = (static_cast<size_t>(bucket_count) * content_width + 7) / 8;

View File

@ -21,6 +21,8 @@ namespace ErrorCodes
void CurrentThread::updatePerformanceCounters() void CurrentThread::updatePerformanceCounters()
{ {
if (unlikely(!current_thread))
return;
get().updatePerformanceCounters(); get().updatePerformanceCounters();
} }
@ -37,30 +39,38 @@ ProfileEvents::Counters & CurrentThread::getProfileEvents()
return current_thread ? get().performance_counters : ProfileEvents::global_counters; return current_thread ? get().performance_counters : ProfileEvents::global_counters;
} }
MemoryTracker & CurrentThread::getMemoryTracker() MemoryTracker * CurrentThread::getMemoryTracker()
{ {
return get().memory_tracker; if (unlikely(!current_thread))
return nullptr;
return &get().memory_tracker;
} }
void CurrentThread::updateProgressIn(const Progress & value) void CurrentThread::updateProgressIn(const Progress & value)
{ {
if (unlikely(!current_thread))
return;
get().progress_in.incrementPiecewiseAtomically(value); get().progress_in.incrementPiecewiseAtomically(value);
} }
void CurrentThread::updateProgressOut(const Progress & value) void CurrentThread::updateProgressOut(const Progress & value)
{ {
if (unlikely(!current_thread))
return;
get().progress_out.incrementPiecewiseAtomically(value); get().progress_out.incrementPiecewiseAtomically(value);
} }
void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr<InternalTextLogsQueue> & logs_queue) void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr<InternalTextLogsQueue> & logs_queue)
{ {
if (unlikely(!current_thread))
return;
get().attachInternalTextLogsQueue(logs_queue); get().attachInternalTextLogsQueue(logs_queue);
} }
std::shared_ptr<InternalTextLogsQueue> CurrentThread::getInternalTextLogsQueue() std::shared_ptr<InternalTextLogsQueue> CurrentThread::getInternalTextLogsQueue()
{ {
/// NOTE: this method could be called at early server startup stage /// NOTE: this method could be called at early server startup stage
if (!current_thread) if (unlikely(!current_thread))
return nullptr; return nullptr;
if (get().getCurrentState() == ThreadStatus::ThreadState::Died) if (get().getCurrentState() == ThreadStatus::ThreadState::Died)
@ -71,7 +81,7 @@ std::shared_ptr<InternalTextLogsQueue> CurrentThread::getInternalTextLogsQueue()
ThreadGroupStatusPtr CurrentThread::getGroup() ThreadGroupStatusPtr CurrentThread::getGroup()
{ {
if (!current_thread) if (unlikely(!current_thread))
return nullptr; return nullptr;
return get().getThreadGroup(); return get().getThreadGroup();

View File

@ -45,7 +45,7 @@ public:
static void updatePerformanceCounters(); static void updatePerformanceCounters();
static ProfileEvents::Counters & getProfileEvents(); static ProfileEvents::Counters & getProfileEvents();
static MemoryTracker & getMemoryTracker(); static MemoryTracker * getMemoryTracker();
/// Update read and write rows (bytes) statistics (used in system.query_thread_log) /// Update read and write rows (bytes) statistics (used in system.query_thread_log)
static void updateProgressIn(const Progress & value); static void updateProgressIn(const Progress & value);

View File

@ -419,6 +419,7 @@ namespace ErrorCodes
extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE = 442; extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE = 442;
extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA = 443; extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA = 443;
extern const int UNKNOWN_PROTOBUF_FORMAT = 444; extern const int UNKNOWN_PROTOBUF_FORMAT = 444;
extern const int CANNOT_MPROTECT = 445;
extern const int KEEPER_EXCEPTION = 999; extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000; extern const int POCO_EXCEPTION = 1000;

View File

@ -190,24 +190,27 @@ namespace CurrentMemoryTracker
{ {
void alloc(Int64 size) void alloc(Int64 size)
{ {
if (DB::current_thread) if (auto memory_tracker = DB::CurrentThread::getMemoryTracker())
DB::CurrentThread::getMemoryTracker().alloc(size); memory_tracker->alloc(size);
} }
void realloc(Int64 old_size, Int64 new_size) void realloc(Int64 old_size, Int64 new_size)
{ {
if (DB::current_thread) if (auto memory_tracker = DB::CurrentThread::getMemoryTracker())
DB::CurrentThread::getMemoryTracker().alloc(new_size - old_size); memory_tracker->alloc(new_size - old_size);
} }
void free(Int64 size) void free(Int64 size)
{ {
if (DB::current_thread) if (auto memory_tracker = DB::CurrentThread::getMemoryTracker())
DB::CurrentThread::getMemoryTracker().free(size); memory_tracker->free(size);
} }
} }
DB::SimpleActionLock getCurrentMemoryTrackerActionLock() DB::SimpleActionLock getCurrentMemoryTrackerActionLock()
{ {
return DB::CurrentThread::getMemoryTracker().blocker.cancel(); auto memory_tracker = DB::CurrentThread::getMemoryTracker();
if (!memory_tracker)
return {};
return memory_tracker->blocker.cancel();
} }

View File

@ -2,6 +2,7 @@
#include <string.h> #include <string.h>
#include <cstddef> #include <cstddef>
#include <cassert>
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
@ -16,10 +17,19 @@
#include <Common/BitHelpers.h> #include <Common/BitHelpers.h>
#include <Common/memcpySmall.h> #include <Common/memcpySmall.h>
#ifndef NDEBUG
#include <sys/mman.h>
#endif
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int CANNOT_MPROTECT;
}
inline constexpr size_t integerRoundUp(size_t value, size_t dividend) inline constexpr size_t integerRoundUp(size_t value, size_t dividend)
{ {
return ((value + dividend - 1) / dividend) * dividend; return ((value + dividend - 1) / dividend) * dividend;
@ -107,6 +117,8 @@ protected:
if (c_start == null) if (c_start == null)
return; return;
unprotect();
TAllocator::free(c_start - pad_left, allocated_bytes()); TAllocator::free(c_start - pad_left, allocated_bytes());
} }
@ -119,6 +131,8 @@ protected:
return; return;
} }
unprotect();
ptrdiff_t end_diff = c_end - c_start; ptrdiff_t end_diff = c_end - c_start;
c_start = reinterpret_cast<char *>( c_start = reinterpret_cast<char *>(
@ -154,11 +168,34 @@ protected:
realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...); realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...);
} }
#ifndef NDEBUG
/// Make memory region readonly with mprotect if it is large enough.
/// The operation is slow and performed only for debug builds.
void protectImpl(int prot)
{
static constexpr size_t PROTECT_PAGE_SIZE = 4096;
char * left_rounded_up = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_start) - pad_left + PROTECT_PAGE_SIZE - 1) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
char * right_rounded_down = reinterpret_cast<char *>((reinterpret_cast<intptr_t>(c_end_of_storage) + pad_right) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
if (right_rounded_down > left_rounded_up)
{
size_t length = right_rounded_down - left_rounded_up;
if (0 != mprotect(left_rounded_up, length, prot))
throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
}
}
/// Restore memory protection in destructor or realloc for further reuse by allocator.
bool mprotected = false;
#endif
public: public:
bool empty() const { return c_end == c_start; } bool empty() const { return c_end == c_start; }
size_t size() const { return (c_end - c_start) / ELEMENT_SIZE; } size_t size() const { return (c_end - c_start) / ELEMENT_SIZE; }
size_t capacity() const { return (c_end_of_storage - c_start) / ELEMENT_SIZE; } size_t capacity() const { return (c_end_of_storage - c_start) / ELEMENT_SIZE; }
/// This method is safe to use only for information about memory usage.
size_t allocated_bytes() const { return c_end_of_storage - c_start + pad_right + pad_left; } size_t allocated_bytes() const { return c_end_of_storage - c_start + pad_right + pad_left; }
void clear() { c_end = c_start; } void clear() { c_end = c_start; }
@ -197,6 +234,23 @@ public:
c_end += byte_size(1); c_end += byte_size(1);
} }
void protect()
{
#ifndef NDEBUG
protectImpl(PROT_READ);
mprotected = true;
#endif
}
void unprotect()
{
#ifndef NDEBUG
if (mprotected)
protectImpl(PROT_WRITE);
mprotected = false;
#endif
}
~PODArrayBase() ~PODArrayBase()
{ {
dealloc(); dealloc();
@ -271,8 +325,18 @@ public:
const T * data() const { return t_start(); } const T * data() const { return t_start(); }
/// The index is signed to access -1th element without pointer overflow. /// The index is signed to access -1th element without pointer overflow.
T & operator[] (ssize_t n) { return t_start()[n]; } T & operator[] (ssize_t n)
const T & operator[] (ssize_t n) const { return t_start()[n]; } {
/// <= size, because taking address of one element past memory range is Ok in C++ (expression like &arr[arr.size()] is perfectly valid).
assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) && (n <= static_cast<ssize_t>(this->size())));
return t_start()[n];
}
const T & operator[] (ssize_t n) const
{
assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) && (n <= static_cast<ssize_t>(this->size())));
return t_start()[n];
}
T & front() { return t_start()[0]; } T & front() { return t_start()[0]; }
T & back() { return t_end()[-1]; } T & back() { return t_end()[-1]; }
@ -390,6 +454,11 @@ public:
void swap(PODArray & rhs) void swap(PODArray & rhs)
{ {
#ifndef NDEBUG
this->unprotect();
rhs.unprotect();
#endif
/// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions: /// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions:
/// - The elements of arr1 are stored on stack. /// - The elements of arr1 are stored on stack.
/// - The elements of arr2 are stored on heap. /// - The elements of arr2 are stored on heap.
@ -438,7 +507,9 @@ public:
}; };
if (!this->isInitialized() && !rhs.isInitialized()) if (!this->isInitialized() && !rhs.isInitialized())
{
return; return;
}
else if (!this->isInitialized() && rhs.isInitialized()) else if (!this->isInitialized() && rhs.isInitialized())
{ {
do_move(rhs, *this); do_move(rhs, *this);
@ -482,9 +553,13 @@ public:
rhs.c_end = rhs.c_start + this->byte_size(lhs_size); rhs.c_end = rhs.c_start + this->byte_size(lhs_size);
} }
else if (this->isAllocatedFromStack() && !rhs.isAllocatedFromStack()) else if (this->isAllocatedFromStack() && !rhs.isAllocatedFromStack())
{
swap_stack_heap(*this, rhs); swap_stack_heap(*this, rhs);
}
else if (!this->isAllocatedFromStack() && rhs.isAllocatedFromStack()) else if (!this->isAllocatedFromStack() && rhs.isAllocatedFromStack())
{
swap_stack_heap(rhs, *this); swap_stack_heap(rhs, *this);
}
else else
{ {
std::swap(this->c_start, rhs.c_start); std::swap(this->c_start, rhs.c_start);

View File

@ -19,7 +19,6 @@ namespace ErrorCodes
{ {
extern const int UNKNOWN_CODEC; extern const int UNKNOWN_CODEC;
extern const int UNEXPECTED_AST_STRUCTURE; extern const int UNEXPECTED_AST_STRUCTURE;
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS; extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
} }
@ -85,7 +84,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const UInt8 byte_code) const
const auto family_code_and_creator = family_code_with_codec.find(byte_code); const auto family_code_and_creator = family_code_with_codec.find(byte_code);
if (family_code_and_creator == family_code_with_codec.end()) if (family_code_and_creator == family_code_with_codec.end())
throw Exception("Unknown codec family code : " + toString(byte_code), ErrorCodes::UNKNOWN_CODEC); throw Exception("Unknown codec family code: " + toString(byte_code), ErrorCodes::UNKNOWN_CODEC);
return family_code_and_creator->second({}, nullptr); return family_code_and_creator->second({}, nullptr);
} }

View File

@ -2,9 +2,9 @@
#include <cmath> #include <cmath>
#include <limits> #include <limits>
#include "Defines.h"
#include "Types.h"
#include <Common/NaNUtils.h> #include <Common/NaNUtils.h>
#include <Core/Types.h>
#include <Common/UInt128.h> #include <Common/UInt128.h>
/** Preceptually-correct number comparisons. /** Preceptually-correct number comparisons.

View File

@ -250,7 +250,8 @@ void BackgroundSchedulePool::threadFunction()
attachToThreadGroup(); attachToThreadGroup();
SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); });
CurrentThread::getMemoryTracker().setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool); if (auto memory_tracker = CurrentThread::getMemoryTracker())
memory_tracker->setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool);
while (!shutdown) while (!shutdown)
{ {

View File

@ -113,6 +113,7 @@ namespace Graphite
struct Pattern struct Pattern
{ {
std::shared_ptr<OptimizedRegularExpression> regexp; std::shared_ptr<OptimizedRegularExpression> regexp;
std::string regexp_str;
AggregateFunctionPtr function; AggregateFunctionPtr function;
Retentions retentions; /// Must be ordered by 'age' descending. Retentions retentions; /// Must be ordered by 'age' descending.
enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically
@ -124,6 +125,7 @@ namespace Graphite
struct Params struct Params
{ {
String config_name;
String path_column_name; String path_column_name;
String time_column_name; String time_column_name;
String value_column_name; String value_column_name;
@ -215,6 +217,7 @@ private:
const Graphite::Pattern undef_pattern = const Graphite::Pattern undef_pattern =
{ /// temporary empty pattern for selectPatternForPath { /// temporary empty pattern for selectPatternForPath
nullptr, nullptr,
"",
nullptr, nullptr,
DB::Graphite::Retentions(), DB::Graphite::Retentions(),
undef_pattern.TypeUndef, undef_pattern.TypeUndef,

View File

@ -6,6 +6,7 @@
#include <DataStreams/SizeLimits.h> #include <DataStreams/SizeLimits.h>
#include <IO/Progress.h> #include <IO/Progress.h>
#include <Interpreters/SettingsCommon.h> #include <Interpreters/SettingsCommon.h>
#include <Storages/TableStructureLockHolder.h>
#include <atomic> #include <atomic>
#include <shared_mutex> #include <shared_mutex>
@ -24,12 +25,9 @@ class IBlockInputStream;
class ProcessListElement; class ProcessListElement;
class QuotaForIntervals; class QuotaForIntervals;
class QueryStatus; class QueryStatus;
class TableStructureReadLock;
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>; using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
using BlockInputStreams = std::vector<BlockInputStreamPtr>; using BlockInputStreams = std::vector<BlockInputStreamPtr>;
using TableStructureReadLockPtr = std::shared_ptr<TableStructureReadLock>;
using TableStructureReadLocks = std::vector<TableStructureReadLockPtr>;
/** Callback to track the progress of the query. /** Callback to track the progress of the query.
* Used in IBlockInputStream and Context. * Used in IBlockInputStream and Context.
@ -117,7 +115,7 @@ public:
size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); } size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); }
/// Do not allow to change the table while the blocks stream and its children are alive. /// Do not allow to change the table while the blocks stream and its children are alive.
void addTableLock(const TableStructureReadLockPtr & lock) { table_locks.push_back(lock); } void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); }
/// Get information about execution speed. /// Get information about execution speed.
const BlockStreamProfileInfo & getProfileInfo() const { return info; } const BlockStreamProfileInfo & getProfileInfo() const { return info; }
@ -244,7 +242,7 @@ public:
protected: protected:
/// Order is important: `table_locks` must be destroyed after `children` so that tables from /// Order is important: `table_locks` must be destroyed after `children` so that tables from
/// which child streams read are protected by the locks during the lifetime of the child streams. /// which child streams read are protected by the locks during the lifetime of the child streams.
TableStructureReadLocks table_locks; std::vector<TableStructureReadLockHolder> table_locks;
BlockInputStreams children; BlockInputStreams children;
std::shared_mutex children_mutex; std::shared_mutex children_mutex;

View File

@ -5,6 +5,7 @@
#include <memory> #include <memory>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <Core/Block.h> #include <Core/Block.h>
#include <Storages/TableStructureLockHolder.h>
namespace DB namespace DB
@ -12,13 +13,6 @@ namespace DB
struct Progress; struct Progress;
class TableStructureReadLock;
using TableStructureReadLockPtr = std::shared_ptr<TableStructureReadLock>;
using TableStructureReadLocks = std::vector<TableStructureReadLockPtr>;
struct Progress;
/** Interface of stream for writing data (into table, filesystem, network, terminal, etc.) /** Interface of stream for writing data (into table, filesystem, network, terminal, etc.)
*/ */
class IBlockOutputStream : private boost::noncopyable class IBlockOutputStream : private boost::noncopyable
@ -64,10 +58,10 @@ public:
/** Don't let to alter table while instance of stream is alive. /** Don't let to alter table while instance of stream is alive.
*/ */
void addTableLock(const TableStructureReadLockPtr & lock) { table_locks.push_back(lock); } void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); }
private: private:
TableStructureReadLocks table_locks; std::vector<TableStructureReadLockHolder> table_locks;
}; };
using BlockOutputStreamPtr = std::shared_ptr<IBlockOutputStream>; using BlockOutputStreamPtr = std::shared_ptr<IBlockOutputStream>;

View File

@ -6,9 +6,14 @@
namespace DB namespace DB
{ {
LimitBlockInputStream::LimitBlockInputStream(const BlockInputStreamPtr & input, UInt64 limit_, UInt64 offset_, bool always_read_till_end_) LimitBlockInputStream::LimitBlockInputStream(const BlockInputStreamPtr & input, UInt64 limit_, UInt64 offset_, bool always_read_till_end_, bool use_limit_as_total_rows_approx)
: limit(limit_), offset(offset_), always_read_till_end(always_read_till_end_) : limit(limit_), offset(offset_), always_read_till_end(always_read_till_end_)
{ {
if (use_limit_as_total_rows_approx)
{
addTotalRowsApprox(static_cast<size_t>(limit));
}
children.push_back(input); children.push_back(input);
} }

View File

@ -16,8 +16,9 @@ public:
* returns an empty block, and this causes the query to be canceled. * returns an empty block, and this causes the query to be canceled.
* If always_read_till_end = true - reads all the data to the end, but ignores them. This is necessary in rare cases: * If always_read_till_end = true - reads all the data to the end, but ignores them. This is necessary in rare cases:
* when otherwise, due to the cancellation of the request, we would not have received the data for GROUP BY WITH TOTALS from the remote server. * when otherwise, due to the cancellation of the request, we would not have received the data for GROUP BY WITH TOTALS from the remote server.
* If use_limit_as_total_rows_approx = true, then addTotalRowsApprox is called to use the limit in progress & stats
*/ */
LimitBlockInputStream(const BlockInputStreamPtr & input, UInt64 limit_, UInt64 offset_, bool always_read_till_end_ = false); LimitBlockInputStream(const BlockInputStreamPtr & input, UInt64 limit_, UInt64 offset_, bool always_read_till_end_ = false, bool use_limit_as_total_rows_approx = false);
String getName() const override { return "Limit"; } String getName() const override { return "Limit"; }

View File

@ -20,7 +20,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
* Although now any insertion into the table is done via PushingToViewsBlockOutputStream, * Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
* but it's clear that here is not the best place for this functionality. * but it's clear that here is not the best place for this functionality.
*/ */
addTableLock(storage->lockStructure(true, context.getCurrentQueryId())); addTableLock(storage->lockStructureForShare(true, context.getCurrentQueryId()));
/// If the "root" table deduplactes blocks, there are no need to make deduplication for children /// If the "root" table deduplactes blocks, there are no need to make deduplication for children
/// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
@ -45,7 +45,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
auto & materialized_view = dynamic_cast<const StorageMaterializedView &>(*dependent_table); auto & materialized_view = dynamic_cast<const StorageMaterializedView &>(*dependent_table);
if (StoragePtr inner_table = materialized_view.tryGetTargetTable()) if (StoragePtr inner_table = materialized_view.tryGetTargetTable())
addTableLock(inner_table->lockStructure(true, context.getCurrentQueryId())); addTableLock(inner_table->lockStructureForShare(true, context.getCurrentQueryId()));
auto query = materialized_view.getInnerQuery(); auto query = materialized_view.getInnerQuery();
BlockOutputStreamPtr out = std::make_shared<PushingToViewsBlockOutputStream>( BlockOutputStreamPtr out = std::make_shared<PushingToViewsBlockOutputStream>(

View File

@ -69,7 +69,7 @@ void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr)
data.resize(old_size + n); data.resize(old_size + n);
try try
{ {
istr.readStrict(reinterpret_cast<char *>(&data[old_size]), n); istr.readStrict(reinterpret_cast<char *>(data.data() + old_size), n);
} }
catch (...) catch (...)
{ {

View File

@ -130,9 +130,9 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
if (size) if (size)
{ {
#ifdef __x86_64__ #ifdef __SSE2__
/// An optimistic branch in which more efficient copying is possible. /// An optimistic branch in which more efficient copying is possible.
if (offset + 16 * UNROLL_TIMES <= data.allocated_bytes() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end()) if (offset + 16 * UNROLL_TIMES <= data.capacity() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end())
{ {
const __m128i * sse_src_pos = reinterpret_cast<const __m128i *>(istr.position()); const __m128i * sse_src_pos = reinterpret_cast<const __m128i *>(istr.position());
const __m128i * sse_src_end = sse_src_pos + (size + (16 * UNROLL_TIMES - 1)) / 16 / UNROLL_TIMES * UNROLL_TIMES; const __m128i * sse_src_end = sse_src_pos + (size + (16 * UNROLL_TIMES - 1)) / 16 / UNROLL_TIMES * UNROLL_TIMES;
@ -140,22 +140,11 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
while (sse_src_pos < sse_src_end) while (sse_src_pos < sse_src_end)
{ {
/// NOTE gcc 4.9.2 unrolls the loop, but for some reason uses only one xmm register. for (size_t j = 0; j < UNROLL_TIMES; ++j)
/// for (size_t j = 0; j < UNROLL_TIMES; ++j) _mm_storeu_si128(sse_dst_pos + j, _mm_loadu_si128(sse_src_pos + j));
/// _mm_storeu_si128(sse_dst_pos + j, _mm_loadu_si128(sse_src_pos + j));
sse_src_pos += UNROLL_TIMES; sse_src_pos += UNROLL_TIMES;
sse_dst_pos += UNROLL_TIMES; sse_dst_pos += UNROLL_TIMES;
if (UNROLL_TIMES >= 4) __asm__("movdqu %0, %%xmm0" :: "m"(sse_src_pos[-4]));
if (UNROLL_TIMES >= 3) __asm__("movdqu %0, %%xmm1" :: "m"(sse_src_pos[-3]));
if (UNROLL_TIMES >= 2) __asm__("movdqu %0, %%xmm2" :: "m"(sse_src_pos[-2]));
if (UNROLL_TIMES >= 1) __asm__("movdqu %0, %%xmm3" :: "m"(sse_src_pos[-1]));
if (UNROLL_TIMES >= 4) __asm__("movdqu %%xmm0, %0" : "=m"(sse_dst_pos[-4]));
if (UNROLL_TIMES >= 3) __asm__("movdqu %%xmm1, %0" : "=m"(sse_dst_pos[-3]));
if (UNROLL_TIMES >= 2) __asm__("movdqu %%xmm2, %0" : "=m"(sse_dst_pos[-2]));
if (UNROLL_TIMES >= 1) __asm__("movdqu %%xmm3, %0" : "=m"(sse_dst_pos[-1]));
} }
istr.position() += size; istr.position() += size;

View File

@ -7,5 +7,5 @@ target_link_libraries (data_type_string PRIVATE dbms)
if(USE_GTEST) if(USE_GTEST)
add_executable(data_type_get_common_type data_type_get_common_type.cpp) add_executable(data_type_get_common_type data_type_get_common_type.cpp)
target_link_libraries(data_type_get_common_type PRIVATE dbms gtest_main) target_link_libraries(data_type_get_common_type PRIVATE dbms ${GTEST_BOTH_LIBRARIES})
endif() endif()

View File

@ -35,6 +35,7 @@ String getTableDefinitionFromCreateQuery(const ASTPtr & query)
create.as_table.clear(); create.as_table.clear();
create.if_not_exists = false; create.if_not_exists = false;
create.is_populate = false; create.is_populate = false;
create.replace_view = false;
/// For views it is necessary to save the SELECT query itself, for the rest - on the contrary /// For views it is necessary to save the SELECT query itself, for the rest - on the contrary
if (!create.is_view && !create.is_materialized_view) if (!create.is_view && !create.is_materialized_view)

View File

@ -3,26 +3,29 @@
#include <ext/singleton.h> #include <ext/singleton.h>
#include "IDictionary.h" #include "IDictionary.h"
namespace Poco namespace Poco
{ {
namespace Util namespace Util
{ {
class AbstractConfiguration; class AbstractConfiguration;
} }
class Logger; class Logger;
} }
namespace DB namespace DB
{ {
class Context; class Context;
class DictionaryFactory : public ext::singleton<DictionaryFactory> class DictionaryFactory : public ext::singleton<DictionaryFactory>
{ {
public: public:
DictionaryPtr DictionaryPtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Context & context) const;
create(const std::string & name, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Context & context)
const;
using Creator = std::function<DictionaryPtr( using Creator = std::function<DictionaryPtr(
const std::string & name, const std::string & name,

View File

@ -77,7 +77,7 @@ void RegionsNames::reload()
throw Poco::Exception("Logical error. Maybe size estimate of " + names_source->getSourceName() + " is wrong."); throw Poco::Exception("Logical error. Maybe size estimate of " + names_source->getSourceName() + " is wrong.");
new_chars.resize(old_size + name_entry.name.length() + 1); new_chars.resize(old_size + name_entry.name.length() + 1);
memcpy(&new_chars[old_size], name_entry.name.c_str(), name_entry.name.length() + 1); memcpy(new_chars.data() + old_size, name_entry.name.c_str(), name_entry.name.length() + 1);
if (name_entry.id > max_region_id) if (name_entry.id > max_region_id)
{ {
@ -92,7 +92,7 @@ void RegionsNames::reload()
while (name_entry.id >= new_names_refs.size()) while (name_entry.id >= new_names_refs.size())
new_names_refs.resize(new_names_refs.size() * 2, StringRef("", 0)); new_names_refs.resize(new_names_refs.size() * 2, StringRef("", 0));
new_names_refs[name_entry.id] = StringRef(&new_chars[old_size], name_entry.name.length()); new_names_refs[name_entry.id] = StringRef(new_chars.data() + old_size, name_entry.name.length());
} }
chars[language_id].swap(new_chars); chars[language_id].swap(new_chars);

View File

@ -59,7 +59,7 @@ namespace
{ {
size_t old_size = buf.size(); size_t old_size = buf.size();
buf.reserve(old_size + MAX_VARINT_SIZE); buf.reserve(old_size + MAX_VARINT_SIZE);
UInt8 * ptr = &buf[old_size]; UInt8 * ptr = buf.data() + old_size;
ptr = writeVarint(value, ptr); ptr = writeVarint(value, ptr);
buf.resize_assume_reserved(ptr - buf.data()); buf.resize_assume_reserved(ptr - buf.data());
} }
@ -200,7 +200,7 @@ void ProtobufWriter::SimpleWriter::writeUInt(UInt32 field_number, UInt64 value)
{ {
size_t old_size = buffer.size(); size_t old_size = buffer.size();
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE); buffer.reserve(old_size + 2 * MAX_VARINT_SIZE);
UInt8 * ptr = &buffer[old_size]; UInt8 * ptr = buffer.data() + old_size;
ptr = writeFieldNumber(field_number, VARINT, ptr); ptr = writeFieldNumber(field_number, VARINT, ptr);
ptr = writeVarint(value, ptr); ptr = writeVarint(value, ptr);
buffer.resize_assume_reserved(ptr - buffer.data()); buffer.resize_assume_reserved(ptr - buffer.data());
@ -223,7 +223,7 @@ void ProtobufWriter::SimpleWriter::writeFixed(UInt32 field_number, T value)
constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64; constexpr WireType wire_type = (sizeof(T) == 4) ? BITS32 : BITS64;
size_t old_size = buffer.size(); size_t old_size = buffer.size();
buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T)); buffer.reserve(old_size + MAX_VARINT_SIZE + sizeof(T));
UInt8 * ptr = &buffer[old_size]; UInt8 * ptr = buffer.data() + old_size;
ptr = writeFieldNumber(field_number, wire_type, ptr); ptr = writeFieldNumber(field_number, wire_type, ptr);
memcpy(ptr, &value, sizeof(T)); memcpy(ptr, &value, sizeof(T));
ptr += sizeof(T); ptr += sizeof(T);
@ -234,7 +234,7 @@ void ProtobufWriter::SimpleWriter::writeString(UInt32 field_number, const String
{ {
size_t old_size = buffer.size(); size_t old_size = buffer.size();
buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + str.size); buffer.reserve(old_size + 2 * MAX_VARINT_SIZE + str.size);
UInt8 * ptr = &buffer[old_size]; UInt8 * ptr = buffer.data() + old_size;
ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr); ptr = writeFieldNumber(field_number, LENGTH_DELIMITED, ptr);
ptr = writeVarint(str.size, ptr); ptr = writeVarint(str.size, ptr);
memcpy(ptr, str.data, str.size); memcpy(ptr, str.data, str.size);
@ -294,7 +294,7 @@ void ProtobufWriter::SimpleWriter::addFixedToRepeatedPack(T value)
static_assert((sizeof(T) == 4) || (sizeof(T) == 8)); static_assert((sizeof(T) == 4) || (sizeof(T) == 8));
size_t old_size = buffer.size(); size_t old_size = buffer.size();
buffer.resize(old_size + sizeof(T)); buffer.resize(old_size + sizeof(T));
memcpy(&buffer[old_size], &value, sizeof(T)); memcpy(buffer.data() + old_size, &value, sizeof(T));
} }

View File

@ -65,7 +65,7 @@ FunctionBasePtr FunctionBuilderJoinGet::buildImpl(const ColumnsWithTypeAndName &
auto join = storage_join->getJoin(); auto join = storage_join->getJoin();
DataTypes data_types(arguments.size()); DataTypes data_types(arguments.size());
auto table_lock = storage_join->lockStructure(false, context.getCurrentQueryId()); auto table_lock = storage_join->lockStructureForShare(false, context.getCurrentQueryId());
for (size_t i = 0; i < arguments.size(); ++i) for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type; data_types[i] = arguments[i].type;

View File

@ -1,4 +1,5 @@
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Storages/TableStructureLockHolder.h>
namespace DB namespace DB
{ {
@ -7,8 +8,6 @@ class IStorage;
using StoragePtr = std::shared_ptr<IStorage>; using StoragePtr = std::shared_ptr<IStorage>;
class Join; class Join;
using JoinPtr = std::shared_ptr<Join>; using JoinPtr = std::shared_ptr<Join>;
class TableStructureReadLock;
using TableStructureReadLockPtr = std::shared_ptr<TableStructureReadLock>;
class FunctionJoinGet final : public IFunction, public std::enable_shared_from_this<FunctionJoinGet> class FunctionJoinGet final : public IFunction, public std::enable_shared_from_this<FunctionJoinGet>
{ {
@ -16,7 +15,7 @@ public:
static constexpr auto name = "joinGet"; static constexpr auto name = "joinGet";
FunctionJoinGet( FunctionJoinGet(
TableStructureReadLockPtr table_lock, StoragePtr storage_join, JoinPtr join, const String & attr_name, DataTypePtr return_type) TableStructureReadLockHolder table_lock, StoragePtr storage_join, JoinPtr join, const String & attr_name, DataTypePtr return_type)
: table_lock(std::move(table_lock)) : table_lock(std::move(table_lock))
, storage_join(std::move(storage_join)) , storage_join(std::move(storage_join))
, join(std::move(join)) , join(std::move(join))
@ -36,7 +35,7 @@ private:
size_t getNumberOfArguments() const override { return 0; } size_t getNumberOfArguments() const override { return 0; }
private: private:
TableStructureReadLockPtr table_lock; TableStructureReadLockHolder table_lock;
StoragePtr storage_join; StoragePtr storage_join;
JoinPtr join; JoinPtr join;
const String attr_name; const String attr_name;

View File

@ -33,6 +33,7 @@ namespace ErrorCodes
{ {
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
} }
@ -123,8 +124,8 @@ public:
} }
else else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(), + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN); ErrorCodes::ILLEGAL_COLUMN);
} }
}; };

View File

@ -36,6 +36,7 @@ namespace ErrorCodes
{ {
extern const int DICTIONARIES_WAS_NOT_LOADED; extern const int DICTIONARIES_WAS_NOT_LOADED;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
} }

View File

@ -44,6 +44,8 @@ namespace ErrorCodes
extern const int UNKNOWN_TYPE; extern const int UNKNOWN_TYPE;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
} }
/** Functions that use plug-ins (external) dictionaries. /** Functions that use plug-ins (external) dictionaries.

View File

@ -20,6 +20,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
} }
enum ClusterOperation enum ClusterOperation

View File

@ -48,6 +48,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int ILLEGAL_COLUMN;
} }

View File

@ -8,7 +8,7 @@
#include <DataTypes/DataTypesDecimal.h> #include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnVector.h> #include <Columns/ColumnVector.h>
#include <Interpreters/castColumn.h> #include <Interpreters/castColumn.h>
#include "IFunction.h"
#include <Common/intExp.h> #include <Common/intExp.h>
#include <cmath> #include <cmath>
#include <type_traits> #include <type_traits>

View File

@ -21,6 +21,7 @@ namespace ErrorCodes
{ {
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
} }

View File

@ -8,164 +8,271 @@
#include <Core/Defines.h> #include <Core/Defines.h>
#include <common/unaligned.h>
#include <algorithm> #include <algorithm>
#include <climits>
#include <cstring> #include <cstring>
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <utility>
#ifdef __SSE4_2__ #ifdef __SSE4_2__
#include <nmmintrin.h> # include <nmmintrin.h>
#endif #endif
namespace DB namespace DB
{ {
/** Distance function implementation. /** Distance function implementation.
* We calculate all the trigrams from left string and count by the index of * We calculate all the n-grams from left string and count by the index of
* 16 bits hash of them in the map. * 16 bits hash of them in the map.
* Then calculate all the trigrams from the right string and calculate * Then calculate all the n-grams from the right string and calculate
* the trigram distance on the flight by adding and subtracting from the hashmap. * the n-gram distance on the flight by adding and subtracting from the hashmap.
* Then return the map into the condition of which it was after the left string * Then return the map into the condition of which it was after the left string
* calculation. If the right string size is big (more than 2**15 bytes), * calculation. If the right string size is big (more than 2**15 bytes),
* the strings are not similar at all and we return 1. * the strings are not similar at all and we return 1.
*/ */
struct TrigramDistanceImpl template <size_t N, class CodePoint, bool UTF8, bool CaseInsensitive>
struct NgramDistanceImpl
{ {
using ResultType = Float32; using ResultType = Float32;
using CodePoint = UInt32;
/// map_size for trigram difference /// map_size for ngram difference.
static constexpr size_t map_size = 1u << 16; static constexpr size_t map_size = 1u << 16;
/// If the haystack size is bigger than this, behaviour is unspecified for this function /// If the haystack size is bigger than this, behaviour is unspecified for this function.
static constexpr size_t max_string_size = 1u << 15; static constexpr size_t max_string_size = 1u << 15;
/// Default padding to read safely.
static constexpr size_t default_padding = 16;
/// Max codepoints to store at once. 16 is for batching usage and PODArray has this padding.
static constexpr size_t simultaneously_codepoints_num = default_padding + N - 1;
/** This fits mostly in L2 cache all the time. /** This fits mostly in L2 cache all the time.
* Actually use UInt16 as addings and subtractions do not UB overflow. But think of it as a signed * Actually use UInt16 as addings and subtractions do not UB overflow. But think of it as a signed
* integer array. * integer array.
*/ */
using TrigramStats = UInt16[map_size]; using NgramStats = UInt16[map_size];
static ALWAYS_INLINE UInt16 trigramHash(CodePoint one, CodePoint two, CodePoint three) static ALWAYS_INLINE UInt16 ASCIIHash(const CodePoint * code_points)
{ {
UInt64 combined = (static_cast<UInt64>(one) << 32) | two; return intHashCRC32(unalignedLoad<UInt32>(code_points)) & 0xFFFFu;
}
static ALWAYS_INLINE UInt16 UTF8Hash(const CodePoint * code_points)
{
UInt64 combined = (static_cast<UInt64>(code_points[0]) << 32) | code_points[1];
#ifdef __SSE4_2__ #ifdef __SSE4_2__
return _mm_crc32_u64(three, combined) & 0xFFFFu; return _mm_crc32_u64(code_points[2], combined) & 0xFFFFu;
#else #else
return (intHashCRC32(combined) ^ intHashCRC32(three)) & 0xFFFFu; return (intHashCRC32(combined) ^ intHashCRC32(code_points[2])) & 0xFFFFu;
#endif #endif
} }
static ALWAYS_INLINE CodePoint readCodePoint(const char *& pos, const char * end) noexcept template <size_t Offset, class Container, size_t... I>
static ALWAYS_INLINE inline void unrollLowering(Container & cont, const std::index_sequence<I...> &)
{ {
size_t length = UTF8::seqLength(*pos); ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...);
if (pos + length > end)
length = end - pos;
CodePoint res;
/// This is faster than just memcpy because of compiler optimizations with moving bytes.
switch (length)
{
case 1:
res = 0;
memcpy(&res, pos, 1);
break;
case 2:
res = 0;
memcpy(&res, pos, 2);
break;
case 3:
res = 0;
memcpy(&res, pos, 3);
break;
default:
memcpy(&res, pos, 4);
}
pos += length;
return res;
} }
static inline size_t calculateNeedleStats(const char * data, const size_t size, TrigramStats & trigram_stats) noexcept static ALWAYS_INLINE size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end)
{ {
size_t len = 0; /// Offset before which we copy some data.
const char * start = data; constexpr size_t padding_offset = default_padding - N + 1;
const char * end = data + size; /// We have an array like this for ASCII (N == 4, other cases are similar)
CodePoint cp1 = 0; /// |a0|a1|a2|a3|a4|a5|a6|a7|a8|a9|a10|a11|a12|a13|a14|a15|a16|a17|a18|
CodePoint cp2 = 0; /// And we copy ^^^^^^^^^^^^^^^ these bytes to the start
CodePoint cp3 = 0; /// Actually it is enough to copy 3 bytes, but memcpy for 4 bytes translates into 1 instruction
memcpy(code_points, code_points + padding_offset, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(CodePoint));
/// Now we have an array
/// |a13|a14|a15|a16|a4|a5|a6|a7|a8|a9|a10|a11|a12|a13|a14|a15|a16|a17|a18|
/// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/// Doing unaligned read of 16 bytes and copy them like above
/// 16 is also chosen to do two `movups`.
/// Such copying allow us to have 3 codepoints from the previous read to produce the 4-grams with them.
memcpy(code_points + (N - 1), pos, default_padding * sizeof(CodePoint));
while (start != end) if constexpr (CaseInsensitive)
{ {
cp1 = cp2; /// We really need template lambdas with C++20 to do it inline
cp2 = cp3; unrollLowering<N - 1>(code_points, std::make_index_sequence<padding_offset>());
cp3 = readCodePoint(start, end);
++len;
if (len < 3)
continue;
++trigram_stats[trigramHash(cp1, cp2, cp3)];
} }
return std::max(static_cast<Int64>(0), static_cast<Int64>(len) - 2); pos += padding_offset;
if (pos > end)
return default_padding - (pos - end);
return default_padding;
} }
static inline UInt64 calculateHaystackStatsAndMetric(const char * data, const size_t size, TrigramStats & trigram_stats, size_t & distance) static ALWAYS_INLINE size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end)
{ {
size_t len = 0; /// The same copying as described in the function above.
size_t trigram_cnt = 0; memcpy(code_points, code_points + default_padding - N + 1, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(CodePoint));
size_t num = N - 1;
while (num < default_padding && pos < end)
{
size_t length = UTF8::seqLength(*pos);
if (pos + length > end)
length = end - pos;
CodePoint res;
/// This is faster than just memcpy because of compiler optimizations with moving bytes.
switch (length)
{
case 1:
res = 0;
memcpy(&res, pos, 1);
break;
case 2:
res = 0;
memcpy(&res, pos, 2);
break;
case 3:
res = 0;
memcpy(&res, pos, 3);
break;
default:
memcpy(&res, pos, 4);
}
/// This is not a really true case insensitive utf8. We zero the 5-th bit of every byte.
/// For ASCII it works https://catonmat.net/ascii-case-conversion-trick. For most cyrrilic letters also does.
/// For others, we don't care now. Lowering UTF is not a cheap operation.
if constexpr (CaseInsensitive)
{
switch (length)
{
case 4:
res &= ~(1u << (5 + 3 * CHAR_BIT));
[[fallthrough]];
case 3:
res &= ~(1u << (5 + 2 * CHAR_BIT));
[[fallthrough]];
case 2:
res &= ~(1u << (5 + CHAR_BIT));
[[fallthrough]];
default:
res &= ~(1u << 5);
}
}
pos += length;
code_points[num++] = res;
}
return num;
}
static ALWAYS_INLINE inline size_t calculateNeedleStats(
const char * data,
const size_t size,
NgramStats & ngram_stats,
size_t (*read_code_points)(CodePoint *, const char *&, const char *),
UInt16 (*hash_functor)(const CodePoint *))
{
// To prevent size_t overflow below.
if (size < N)
return 0;
const char * start = data; const char * start = data;
const char * end = data + size; const char * end = data + size;
CodePoint cp1 = 0; CodePoint cp[simultaneously_codepoints_num] = {};
CodePoint cp2 = 0;
CodePoint cp3 = 0; /// read_code_points returns the position of cp where it stopped reading codepoints.
size_t found = read_code_points(cp, start, end);
/// We need to start for the first time here, because first N - 1 codepoints mean nothing.
size_t i = N - 1;
/// Initialize with this value because for the first time `found` does not initialize first N - 1 codepoints.
size_t len = -N + 1;
do
{
len += found - N + 1;
for (; i + N <= found; ++i)
++ngram_stats[hash_functor(cp + i)];
i = 0;
} while (start < end && (found = read_code_points(cp, start, end)));
return len;
}
static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
const char * data,
const size_t size,
NgramStats & ngram_stats,
size_t & distance,
size_t (*read_code_points)(CodePoint *, const char *&, const char *),
UInt16 (*hash_functor)(const CodePoint *))
{
size_t ngram_cnt = 0;
const char * start = data;
const char * end = data + size;
CodePoint cp[simultaneously_codepoints_num] = {};
/// allocation tricks, most strings are relatively small /// allocation tricks, most strings are relatively small
static constexpr size_t small_buffer_size = 256; static constexpr size_t small_buffer_size = 256;
std::unique_ptr<UInt16[]> big_buffer; std::unique_ptr<UInt16[]> big_buffer;
UInt16 small_buffer[small_buffer_size]; UInt16 small_buffer[small_buffer_size];
UInt16 * trigram_storage = small_buffer; UInt16 * ngram_storage = small_buffer;
if (size > small_buffer_size) if (size > small_buffer_size)
{ {
trigram_storage = new UInt16[size]; ngram_storage = new UInt16[size];
big_buffer.reset(trigram_storage); big_buffer.reset(ngram_storage);
} }
while (start != end) /// read_code_points returns the position of cp where it stopped reading codepoints.
size_t found = read_code_points(cp, start, end);
/// We need to start for the first time here, because first N - 1 codepoints mean nothing.
size_t iter = N - 1;
do
{ {
cp1 = cp2; for (; iter + N <= found; ++iter)
cp2 = cp3; {
cp3 = readCodePoint(start, end); UInt16 hash = hash_functor(cp + iter);
++len; if (static_cast<Int16>(ngram_stats[hash]) > 0)
if (len < 3) --distance;
continue; else
++distance;
UInt16 hash = trigramHash(cp1, cp2, cp3); ngram_storage[ngram_cnt++] = hash;
--ngram_stats[hash];
if (static_cast<Int16>(trigram_stats[hash]) > 0) }
--distance; iter = 0;
else } while (start < end && (found = read_code_points(cp, start, end)));
++distance;
trigram_storage[trigram_cnt++] = hash;
--trigram_stats[hash];
}
/// Return the state of hash map to its initial. /// Return the state of hash map to its initial.
for (size_t i = 0; i < trigram_cnt; ++i) for (size_t i = 0; i < ngram_cnt; ++i)
++trigram_stats[trigram_storage[i]]; ++ngram_stats[ngram_storage[i]];
return ngram_cnt;
return trigram_cnt;
} }
static void constant_constant(const std::string & data, const std::string & needle, Float32 & res) template <class Callback, class... Args>
static inline size_t dispatchSearcher(Callback callback, Args &&... args)
{ {
TrigramStats common_stats; if constexpr (!UTF8)
return callback(std::forward<Args>(args)..., readASCIICodePoints, ASCIIHash);
else
return callback(std::forward<Args>(args)..., readUTF8CodePoints, UTF8Hash);
}
static void constant_constant(std::string data, std::string needle, Float32 & res)
{
NgramStats common_stats;
memset(common_stats, 0, sizeof(common_stats)); memset(common_stats, 0, sizeof(common_stats));
size_t second_size = calculateNeedleStats(needle.data(), needle.size(), common_stats);
/// We use unsafe versions of getting ngrams, so I decided to use padded strings.
const size_t needle_size = needle.size();
const size_t data_size = data.size();
needle.resize(needle_size + default_padding);
data.resize(data_size + default_padding);
size_t second_size = dispatchSearcher(calculateNeedleStats, needle.data(), needle_size, common_stats);
size_t distance = second_size; size_t distance = second_size;
if (data.size() <= max_string_size) if (data_size <= max_string_size)
{ {
size_t first_size = calculateHaystackStatsAndMetric(data.data(), data.size(), common_stats, distance); size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric, data.data(), data_size, common_stats, distance);
res = distance * 1.f / std::max(first_size + second_size, size_t(1)); res = distance * 1.f / std::max(first_size + second_size, size_t(1));
} }
else else
@ -175,11 +282,18 @@ struct TrigramDistanceImpl
} }
static void vector_constant( static void vector_constant(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & needle, PaddedPODArray<Float32> & res) const ColumnString::Chars & data, const ColumnString::Offsets & offsets, std::string needle, PaddedPODArray<Float32> & res)
{ {
TrigramStats common_stats; /// zeroing our map
NgramStats common_stats;
memset(common_stats, 0, sizeof(common_stats)); memset(common_stats, 0, sizeof(common_stats));
const size_t needle_stats_size = calculateNeedleStats(needle.data(), needle.size(), common_stats);
/// We use unsafe versions of getting ngrams, so I decided to use padded_data even in needle case.
const size_t needle_size = needle.size();
needle.resize(needle_size + default_padding);
const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats, needle.data(), needle_size, common_stats);
size_t distance = needle_stats_size; size_t distance = needle_stats_size;
size_t prev_offset = 0; size_t prev_offset = 0;
for (size_t i = 0; i < offsets.size(); ++i) for (size_t i = 0; i < offsets.size(); ++i)
@ -188,12 +302,13 @@ struct TrigramDistanceImpl
const size_t haystack_size = offsets[i] - prev_offset - 1; const size_t haystack_size = offsets[i] - prev_offset - 1;
if (haystack_size <= max_string_size) if (haystack_size <= max_string_size)
{ {
size_t haystack_stats_size size_t haystack_stats_size = dispatchSearcher(
= calculateHaystackStatsAndMetric(reinterpret_cast<const char *>(haystack), haystack_size, common_stats, distance); calculateHaystackStatsAndMetric, reinterpret_cast<const char *>(haystack), haystack_size, common_stats, distance);
res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1)); res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
} }
else else
{ {
/// if the strings are too big, we say they are completely not the same
res[i] = 1.f; res[i] = 1.f;
} }
distance = needle_stats_size; distance = needle_stats_size;
@ -203,16 +318,39 @@ struct TrigramDistanceImpl
}; };
struct TrigramDistanceName struct NgramDistanceName
{ {
static constexpr auto name = "trigramDistance"; static constexpr auto name = "ngramDistance";
}; };
using FunctionTrigramsDistance = FunctionsStringSimilarity<TrigramDistanceImpl, TrigramDistanceName>; struct NgramDistanceCaseInsensitiveName
{
static constexpr auto name = "ngramDistanceCaseInsensitive";
};
struct NgramDistanceUTF8Name
{
static constexpr auto name = "ngramDistanceUTF8";
};
struct NgramDistanceUTF8CaseInsensitiveName
{
static constexpr auto name = "ngramDistanceCaseInsensitiveUTF8";
};
using FunctionNgramDistance = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, false>, NgramDistanceName>;
using FunctionNgramDistanceCaseInsensitive
= FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, true>, NgramDistanceCaseInsensitiveName>;
using FunctionNgramDistanceUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, false>, NgramDistanceUTF8Name>;
using FunctionNgramDistanceCaseInsensitiveUTF8
= FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true>, NgramDistanceUTF8CaseInsensitiveName>;
void registerFunctionsStringSimilarity(FunctionFactory & factory) void registerFunctionsStringSimilarity(FunctionFactory & factory)
{ {
factory.registerFunction<FunctionTrigramsDistance>(); factory.registerFunction<FunctionNgramDistance>();
factory.registerFunction<FunctionNgramDistanceCaseInsensitive>();
factory.registerFunction<FunctionNgramDistanceUTF8>();
factory.registerFunction<FunctionNgramDistanceCaseInsensitiveUTF8>();
} }
} }

View File

@ -12,8 +12,9 @@ namespace DB
/** Calculate similarity metrics: /** Calculate similarity metrics:
* *
* trigramDistance(haystack, needle) --- calculate so called 3-gram distance between haystack and needle. * ngramDistance(haystack, needle) --- calculate n-gram distance between haystack and needle.
* Returns float number from 0 to 1 - the closer to zero, the more strings are similar to each other. * Returns float number from 0 to 1 - the closer to zero, the more strings are similar to each other.
* Also support CaseInsensitive and UTF8 formats.
*/ */
namespace ErrorCodes namespace ErrorCodes

View File

@ -38,6 +38,11 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
struct HasParam struct HasParam
{ {
using ResultType = UInt8; using ResultType = UInt8;

View File

@ -53,7 +53,7 @@ inline ALWAYS_INLINE void writeSlice(const StringSource::Slice & slice, FixedStr
/// Assuming same types of underlying columns for slice and sink if (ArraySlice, ArraySink) is (GenericArraySlice, GenericArraySink). /// Assuming same types of underlying columns for slice and sink if (ArraySlice, ArraySink) is (GenericArraySlice, GenericArraySink).
inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink) inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArraySink & sink)
{ {
if (typeid(slice.elements) == typeid(static_cast<const IColumn *>(&sink.elements))) if (slice.elements->structureEquals(sink.elements))
{ {
sink.elements.insertRangeFrom(*slice.elements, slice.begin, slice.size); sink.elements.insertRangeFrom(*slice.elements, slice.begin, slice.size);
sink.current_offset += slice.size; sink.current_offset += slice.size;
@ -125,7 +125,7 @@ void writeSlice(const NumericValueSlice<T> & slice, NumericArraySink<U> & sink)
/// Assuming same types of underlying columns for slice and sink if (ArraySlice, ArraySink) is (GenericValueSlice, GenericArraySink). /// Assuming same types of underlying columns for slice and sink if (ArraySlice, ArraySink) is (GenericValueSlice, GenericArraySink).
inline ALWAYS_INLINE void writeSlice(const GenericValueSlice & slice, GenericArraySink & sink) inline ALWAYS_INLINE void writeSlice(const GenericValueSlice & slice, GenericArraySink & sink)
{ {
if (typeid(slice.elements) == typeid(static_cast<const IColumn *>(&sink.elements))) if (slice.elements->structureEquals(sink.elements))
{ {
sink.elements.insertFrom(*slice.elements, slice.position); sink.elements.insertFrom(*slice.elements, slice.position);
++sink.current_offset; ++sink.current_offset;
@ -457,7 +457,7 @@ template <bool all>
bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second) bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second)
{ {
/// Generic arrays should have the same type in order to use column.compareAt(...) /// Generic arrays should have the same type in order to use column.compareAt(...)
if (typeid(*first.elements) != typeid(*second.elements)) if (!first.elements->structureEquals(*second.elements))
return false; return false;
auto impl = sliceHasImpl<all, GenericArraySlice, GenericArraySlice, sliceEqualElements>; auto impl = sliceHasImpl<all, GenericArraySlice, GenericArraySlice, sliceEqualElements>;

View File

@ -14,7 +14,15 @@
#include <Functions/GatherUtils/Slices.h> #include <Functions/GatherUtils/Slices.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
namespace DB::GatherUtils namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
namespace GatherUtils
{ {
template <typename T> template <typename T>
@ -660,3 +668,5 @@ struct NullableValueSource : public ValueSource
}; };
} }
}

View File

@ -6,6 +6,11 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments)
{ {
const size_t num_arguments = arguments.size(); const size_t num_arguments = arguments.size();

View File

@ -22,6 +22,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
} }

View File

@ -7,6 +7,11 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/// flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5] - flatten array. /// flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5] - flatten array.
class FunctionFlatten : public IFunction class FunctionFlatten : public IFunction
{ {

View File

@ -53,10 +53,8 @@ public:
size_t rows = input_rows_count; size_t rows = input_rows_count;
size_t num_args = arguments.size(); size_t num_args = arguments.size();
auto result_column = ColumnUInt8::create(rows);
DataTypePtr common_type = nullptr; DataTypePtr common_type = nullptr;
auto commonType = [& common_type, & block, & arguments]() auto commonType = [&common_type, &block, &arguments]()
{ {
if (common_type == nullptr) if (common_type == nullptr)
{ {
@ -106,6 +104,7 @@ public:
throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR}; throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR};
} }
auto result_column = ColumnUInt8::create(rows);
auto result_column_ptr = typeid_cast<ColumnUInt8 *>(result_column.get()); auto result_column_ptr = typeid_cast<ColumnUInt8 *>(result_column.get());
GatherUtils::sliceHas(*sources[0], *sources[1], all, *result_column_ptr); GatherUtils::sliceHas(*sources[0], *sources[1], all, *result_column_ptr);

View File

@ -8,6 +8,11 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument). /** Creates an array, multiplying the column (the first argument) by the number of elements in the array (the second argument).
*/ */
class FunctionReplicate : public IFunction class FunctionReplicate : public IFunction
@ -54,7 +59,7 @@ public:
array_column = checkAndGetColumn<ColumnArray>(temp_column.get()); array_column = checkAndGetColumn<ColumnArray>(temp_column.get());
} }
block.getByPosition(result).column block.getByPosition(result).column
= ColumnArray::create(first_column->replicate(array_column->getOffsets()), array_column->getOffsetsPtr()); = ColumnArray::create(first_column->replicate(array_column->getOffsets())->convertToFullColumnIfConst(), array_column->getOffsetsPtr());
} }
}; };

View File

@ -17,6 +17,7 @@ namespace ErrorCodes
{ {
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
} }
/** timeSlots(StartTime, Duration) /** timeSlots(StartTime, Duration)

View File

@ -55,14 +55,11 @@ struct Progress
/// Each value separately is changed atomically (but not whole object). /// Each value separately is changed atomically (but not whole object).
bool incrementPiecewiseAtomically(const Progress & rhs) bool incrementPiecewiseAtomically(const Progress & rhs)
{ {
if (!rhs.rows)
return false;
rows += rhs.rows; rows += rhs.rows;
bytes += rhs.bytes; bytes += rhs.bytes;
total_rows += rhs.total_rows; total_rows += rhs.total_rows;
return true; return rhs.rows ? true : false;
} }
void reset() void reset()

View File

@ -34,7 +34,7 @@ private:
size_t old_size = vector.size(); size_t old_size = vector.size();
vector.resize(old_size * 2); vector.resize(old_size * 2);
internal_buffer = Buffer(reinterpret_cast<Position>(&vector[old_size]), reinterpret_cast<Position>(vector.data() + vector.size())); internal_buffer = Buffer(reinterpret_cast<Position>(vector.data() + old_size), reinterpret_cast<Position>(vector.data() + vector.size()));
working_buffer = internal_buffer; working_buffer = internal_buffer;
} }

View File

@ -3,6 +3,7 @@
#include <Core/Defines.h> #include <Core/Defines.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <common/itoa.h> #include <common/itoa.h>
#include <common/likely.h>
/// 40 digits or 39 digits and a sign /// 40 digits or 39 digits and a sign
#define WRITE_HELPERS_MAX_INT_WIDTH 40U #define WRITE_HELPERS_MAX_INT_WIDTH 40U

View File

@ -102,23 +102,23 @@ static inline T ALWAYS_INLINE packFixed(
switch (key_sizes[j]) switch (key_sizes[j])
{ {
case 1: case 1:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin() + index, 1); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index, 1);
offset += 1; offset += 1;
break; break;
case 2: case 2:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin() + index * 2, 2); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<2>() + index * 2, 2);
offset += 2; offset += 2;
break; break;
case 4: case 4:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin() + index * 4, 4); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<4>() + index * 4, 4);
offset += 4; offset += 4;
break; break;
case 8: case 8:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin() + index * 8, 8); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<8>() + index * 8, 8);
offset += 8; offset += 8;
break; break;
default: default:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin() + index * key_sizes[j], key_sizes[j]); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]);
offset += key_sizes[j]; offset += key_sizes[j];
} }
} }
@ -168,23 +168,23 @@ static inline T ALWAYS_INLINE packFixed(
switch (key_sizes[j]) switch (key_sizes[j])
{ {
case 1: case 1:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin() + i, 1); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i, 1);
offset += 1; offset += 1;
break; break;
case 2: case 2:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin() + i * 2, 2); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<2>() + i * 2, 2);
offset += 2; offset += 2;
break; break;
case 4: case 4:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin() + i * 4, 4); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<4>() + i * 4, 4);
offset += 4; offset += 4;
break; break;
case 8: case 8:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin() + i * 8, 8); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<8>() + i * 8, 8);
offset += 8; offset += 8;
break; break;
default: default:
memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin() + i * key_sizes[j], key_sizes[j]); memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]);
offset += key_sizes[j]; offset += key_sizes[j];
} }
} }

Some files were not shown because too many files have changed in this diff Show More