Merge branch 'master' into enable_use_minimalistic_part_header_in_zookeeper

This commit is contained in:
Alexey Milovidov 2019-09-23 22:26:54 +03:00
commit 1744e9a13f
710 changed files with 16508 additions and 28996 deletions

View File

@ -17,7 +17,7 @@ A clear and concise description of what works not as it is supposed to.
* Which interface to use, if matters * Which interface to use, if matters
* Non-default settings, if any * Non-default settings, if any
* `CREATE TABLE` statements for all tables involved * `CREATE TABLE` statements for all tables involved
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/yandex/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Queries to run that lead to unexpected result * Queries to run that lead to unexpected result
**Expected behavior** **Expected behavior**

View File

@ -17,7 +17,7 @@ What exactly works slower than expected?
* Which interface to use, if matters * Which interface to use, if matters
* Non-default settings, if any * Non-default settings, if any
* `CREATE TABLE` statements for all tables involved * `CREATE TABLE` statements for all tables involved
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/yandex/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Queries to run that lead to slow performance * Queries to run that lead to slow performance
**Expected performance** **Expected performance**

3
.gitmodules vendored
View File

@ -103,3 +103,6 @@
[submodule "contrib/orc"] [submodule "contrib/orc"]
path = contrib/orc path = contrib/orc
url = https://github.com/apache/orc url = https://github.com/apache/orc
[submodule "contrib/sparsehash-c11"]
path = contrib/sparsehash-c11
url = https://github.com/sparsehash/sparsehash-c11.git

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,6 @@
project(ClickHouse)
cmake_minimum_required(VERSION 3.3)
foreach(policy foreach(policy
CMP0023 CMP0023
CMP0048 # CMake 3.0 CMP0048 # CMake 3.0
@ -10,9 +13,6 @@ foreach(policy
endif() endif()
endforeach() endforeach()
project(ClickHouse)
cmake_minimum_required(VERSION 3.3)
# Ignore export() since we don't use it, # Ignore export() since we don't use it,
# but it gets broken with a global targets via link_libraries() # but it gets broken with a global targets via link_libraries()
macro (export) macro (export)
@ -91,6 +91,14 @@ if (USE_STATIC_LIBRARIES)
list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES) list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
endif () endif ()
option (ENABLE_FUZZING "Enables fuzzing instrumentation" OFF)
if (ENABLE_FUZZING)
message (STATUS "Fuzzing instrumentation enabled")
set (WITH_COVERAGE ON)
set (SANITIZE "libfuzzer")
endif()
include (cmake/sanitize.cmake) include (cmake/sanitize.cmake)
@ -139,7 +147,7 @@ endif ()
string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER}) string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER})
find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld") find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld")
find_program (GOLD_PATH NAMES "gold") find_program (GOLD_PATH NAMES "ld.gold" "gold")
if (COMPILER_CLANG AND LLD_PATH AND NOT LINKER_NAME) if (COMPILER_CLANG AND LLD_PATH AND NOT LINKER_NAME)
set (LINKER_NAME "lld") set (LINKER_NAME "lld")
@ -150,8 +158,32 @@ endif ()
if (LINKER_NAME) if (LINKER_NAME)
message(STATUS "Using linker: ${LINKER_NAME} (selected from: LLD_PATH=${LLD_PATH}; GOLD_PATH=${GOLD_PATH}; COMPILER_POSTFIX=${COMPILER_POSTFIX})") message(STATUS "Using linker: ${LINKER_NAME} (selected from: LLD_PATH=${LLD_PATH}; GOLD_PATH=${GOLD_PATH}; COMPILER_POSTFIX=${COMPILER_POSTFIX})")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
endif () endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
option (ADD_GDB_INDEX_FOR_GOLD "Set to add .gdb-index to resulting binaries for gold linker. NOOP if lld is used." 0)
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
if (LINKER_NAME STREQUAL "lld")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
# we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces
# http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932
elseif (LINKER_NAME STREQUAL "gold" AND ADD_GDB_INDEX_FOR_GOLD)
find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable")
if (NOT GDB_ADD_INDEX_EXE)
set (USE_GDB_ADD_INDEX 0)
message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.")
else()
set (USE_GDB_ADD_INDEX 1)
message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}")
endif()
endif ()
endif()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000) if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000)
option(COMPILER_PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON) option(COMPILER_PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON)
@ -226,8 +258,8 @@ endif ()
# Make this extra-checks for correct library dependencies. # Make this extra-checks for correct library dependencies.
if (NOT SANITIZE) if (NOT SANITIZE)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined")
endif () endif ()
include(cmake/dbms_glob_sources.cmake) include(cmake/dbms_glob_sources.cmake)
@ -286,18 +318,6 @@ else ()
set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc") set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc")
endif () endif ()
option (UNBUNDLED "Try find all libraries in system. We recommend to avoid this mode for production builds, because we cannot guarantee exact versions and variants of libraries your system has installed. This mode exists for enthusiastic developers who search for trouble. Also it is useful for maintainers of OS packages." OFF)
if (UNBUNDLED)
set(NOT_UNBUNDLED 0)
else ()
set(NOT_UNBUNDLED 1)
endif ()
# Using system libs can cause lot of warnings in includes.
if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32)
option (NO_WERROR "Disable -Werror compiler option" ON)
endif ()
message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}") message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
include(GNUInstallDirs) include(GNUInstallDirs)
@ -343,7 +363,7 @@ include (cmake/find_hyperscan.cmake)
include (cmake/find_simdjson.cmake) include (cmake/find_simdjson.cmake)
include (cmake/find_rapidjson.cmake) include (cmake/find_rapidjson.cmake)
include (cmake/find_fastops.cmake) include (cmake/find_fastops.cmake)
include (cmake/find_orc.cmake) #include (cmake/find_orc.cmake)
find_contrib_lib(cityhash) find_contrib_lib(cityhash)
find_contrib_lib(farmhash) find_contrib_lib(farmhash)
@ -360,6 +380,29 @@ include (libs/libcommon/cmake/find_jemalloc.cmake)
include (libs/libcommon/cmake/find_cctz.cmake) include (libs/libcommon/cmake/find_cctz.cmake)
include (libs/libmysqlxx/cmake/find_mysqlclient.cmake) include (libs/libmysqlxx/cmake/find_mysqlclient.cmake)
# When testing for memory leaks with Valgrind, dont link tcmalloc or jemalloc.
if (USE_JEMALLOC)
message (STATUS "Link jemalloc: ${JEMALLOC_LIBRARIES}")
set (MALLOC_LIBRARIES ${JEMALLOC_LIBRARIES})
elseif (USE_TCMALLOC)
if (DEBUG_TCMALLOC AND NOT GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG)
message (FATAL_ERROR "Requested DEBUG_TCMALLOC but debug library is not found. You should install Google Perftools. Example: sudo apt-get install libgoogle-perftools-dev")
endif ()
if (DEBUG_TCMALLOC AND GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG)
message (STATUS "Link libtcmalloc_minimal_debug for testing: ${GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG}")
set (MALLOC_LIBRARIES ${GPERFTOOLS_TCMALLOC_MINIMAL_DEBUG})
else ()
message (STATUS "Link libtcmalloc_minimal: ${GPERFTOOLS_TCMALLOC_MINIMAL}")
set (MALLOC_LIBRARIES ${GPERFTOOLS_TCMALLOC_MINIMAL})
endif ()
elseif (SANITIZE)
message (STATUS "Will use ${SANITIZE} sanitizer.")
else ()
message (WARNING "Non default allocator is disabled. This is not recommended for production Linux builds.")
endif ()
include (cmake/print_flags.cmake) include (cmake/print_flags.cmake)
install (EXPORT global DESTINATION cmake) install (EXPORT global DESTINATION cmake)

View File

@ -12,8 +12,7 @@
# https://youtrack.jetbrains.com/issue/CPP-2659 # https://youtrack.jetbrains.com/issue/CPP-2659
# https://youtrack.jetbrains.com/issue/CPP-870 # https://youtrack.jetbrains.com/issue/CPP-870
string(TOLOWER "${CMAKE_COMMAND}" CMAKE_COMMAND_LOWER) if (NOT DEFINED ENV{CLION_IDE})
if (NOT ${CMAKE_COMMAND_LOWER} MATCHES "clion")
find_program(NINJA_PATH ninja) find_program(NINJA_PATH ninja)
if (NINJA_PATH) if (NINJA_PATH)
set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "" FORCE) set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "" FORCE)

View File

@ -1,4 +1,4 @@
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/yandex/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.yandex) [![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.yandex)
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time. ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.
@ -13,9 +13,12 @@ ClickHouse is an open-source column-oriented database management system that all
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person. * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
## Upcoming Events ## Upcoming Events
* [ClickHouse Meetup in Moscow](https://yandex.ru/promo/clickhouse/moscow-2019) on September 5.
* [ClickHouse Meetup in Munich](https://www.meetup.com/ClickHouse-Meetup-Munich/events/264185199/) on September 17.
* [ClickHouse Meetup in Paris](https://www.eventbrite.com/e/clickhouse-paris-meetup-2019-registration-68493270215) on October 3. * [ClickHouse Meetup in Paris](https://www.eventbrite.com/e/clickhouse-paris-meetup-2019-registration-68493270215) on October 3.
* [ClickHouse Meetup in San Francisco](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/264242199/) on October 9.
* [ClickHouse Meetup in Hong Kong](https://www.meetup.com/Hong-Kong-Machine-Learning-Meetup/events/263580542/) on October 17. * [ClickHouse Meetup in Hong Kong](https://www.meetup.com/Hong-Kong-Machine-Learning-Meetup/events/263580542/) on October 17.
* [ClickHouse Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20. * [ClickHouse Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20.
* [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27. * [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27.
* [ClickHouse Meetup in Tokyo](https://clickhouse.connpass.com/event/147001/) on November 14.
* [ClickHouse Meetup in Istanbul](https://www.eventbrite.com/e/clickhouse-meetup-istanbul-create-blazing-fast-experiences-w-clickhouse-tickets-73101120419) on November 19.
* [ClickHouse Meetup in Ankara](https://www.eventbrite.com/e/clickhouse-meetup-ankara-create-blazing-fast-experiences-w-clickhouse-tickets-73100530655) on November 21.

View File

@ -14,4 +14,4 @@ currently being supported with security updates:
## Reporting a Vulnerability ## Reporting a Vulnerability
To report a potential vulnerability in ClickHouse please use the security advisory feature of GitHub: To report a potential vulnerability in ClickHouse please use the security advisory feature of GitHub:
https://github.com/yandex/ClickHouse/security/advisories https://github.com/ClickHouse/ClickHouse/security/advisories

View File

@ -1,20 +1,26 @@
option (USE_CAPNP "Enable Cap'n Proto" ON) option (ENABLE_CAPNP "Enable Cap'n Proto" ON)
if (USE_CAPNP) if (ENABLE_CAPNP)
option (USE_INTERNAL_CAPNP_LIBRARY "Set to FALSE to use system capnproto library instead of bundled" ${NOT_UNBUNDLED})
# FIXME: refactor to use `add_library( IMPORTED)` if possible. option (USE_INTERNAL_CAPNP_LIBRARY "Set to FALSE to use system capnproto library instead of bundled" ${NOT_UNBUNDLED})
if (NOT USE_INTERNAL_CAPNP_LIBRARY)
find_library (KJ kj)
find_library (CAPNP capnp)
find_library (CAPNPC capnpc)
set (CAPNP_LIBRARIES ${CAPNPC} ${CAPNP} ${KJ}) # FIXME: refactor to use `add_library( IMPORTED)` if possible.
else () if (NOT USE_INTERNAL_CAPNP_LIBRARY)
add_subdirectory(contrib/capnproto-cmake) find_library (KJ kj)
find_library (CAPNP capnp)
find_library (CAPNPC capnpc)
set (CAPNP_LIBRARIES capnpc) set (CAPNP_LIBRARIES ${CAPNPC} ${CAPNP} ${KJ})
endif () else ()
add_subdirectory(contrib/capnproto-cmake)
message (STATUS "Using capnp: ${CAPNP_LIBRARIES}") set (CAPNP_LIBRARIES capnpc)
endif () endif ()
if (CAPNP_LIBRARIES)
set (USE_CAPNP 1)
endif ()
endif ()
message (STATUS "Using capnp: ${CAPNP_LIBRARIES}")

View File

@ -1,24 +1,29 @@
if (NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND USE_PROTOBUF) if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND USE_PROTOBUF)
option (ENABLE_HDFS "Enable HDFS" ${NOT_UNBUNDLED}) option(ENABLE_HDFS "Enable HDFS" 1)
endif () endif()
if (ENABLE_HDFS AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h") if(ENABLE_HDFS)
message (WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init --recursive") option(USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ${NOT_UNBUNDLED})
set (ENABLE_HDFS 0)
endif ()
if (ENABLE_HDFS) if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h")
option (USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ON) if(USE_INTERNAL_HDFS3_LIBRARY)
message(WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set(MISSING_INTERNAL_HDFS3_LIBRARY 1)
set(USE_INTERNAL_HDFS3_LIBRARY 0)
endif()
if (NOT USE_INTERNAL_HDFS3_LIBRARY) if(NOT USE_INTERNAL_HDFS3_LIBRARY)
find_package(hdfs3) find_library(HDFS3_LIBRARY hdfs3)
endif () find_path(HDFS3_INCLUDE_DIR NAMES hdfs/hdfs.h PATHS ${HDFS3_INCLUDE_PATHS})
endif()
if (HDFS3_LIBRARY AND HDFS3_INCLUDE_DIR) if(HDFS3_LIBRARY AND HDFS3_INCLUDE_DIR)
set(USE_HDFS 1) set(USE_HDFS 1)
elseif (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY) elseif(NOT MISSING_INTERNAL_HDFS3_LIBRARY AND LIBGSASL_LIBRARY AND LIBXML2_LIBRARY)
set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include") set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include")
set(HDFS3_LIBRARY hdfs3) set(HDFS3_LIBRARY hdfs3)
set(USE_INTERNAL_HDFS3_LIBRARY 1)
set(USE_HDFS 1) set(USE_HDFS 1)
else() else()
set(USE_INTERNAL_HDFS3_LIBRARY 0) set(USE_INTERNAL_HDFS3_LIBRARY 0)
@ -26,4 +31,4 @@ endif()
endif() endif()
message (STATUS "Using hdfs3=${USE_HDFS}: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}") message(STATUS "Using hdfs3=${USE_HDFS}: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}")

View File

@ -18,22 +18,12 @@ if (ENABLE_EMBEDDED_COMPILER)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
find_package(LLVM ${CMAKE_CXX_COMPILER_VERSION} CONFIG PATHS ${LLVM_PATHS}) find_package(LLVM ${CMAKE_CXX_COMPILER_VERSION} CONFIG PATHS ${LLVM_PATHS})
else () else ()
#TODO: # TODO: 9 8
#if(NOT LLVM_FOUND) foreach(llvm_v 7.1 7 6 5)
# find_package(LLVM 9 CONFIG PATHS ${LLVM_PATHS}) if (NOT LLVM_FOUND)
#endif() find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS})
#if(NOT LLVM_FOUND) endif ()
# find_package(LLVM 8 CONFIG PATHS ${LLVM_PATHS}) endforeach ()
#endif()
if (NOT LLVM_FOUND)
find_package (LLVM 7 CONFIG PATHS ${LLVM_PATHS})
endif ()
if (NOT LLVM_FOUND)
find_package (LLVM 6 CONFIG PATHS ${LLVM_PATHS})
endif ()
if (NOT LLVM_FOUND)
find_package (LLVM 5 CONFIG PATHS ${LLVM_PATHS})
endif ()
endif () endif ()
if (LLVM_FOUND) if (LLVM_FOUND)

View File

@ -1,8 +1,38 @@
##TODO replace hardcode to find procedure option (ENABLE_ORC "Enable ORC" 1)
set(USE_ORC 0) if(ENABLE_ORC)
set(USE_INTERNAL_ORC_LIBRARY ON) option (USE_INTERNAL_ORC_LIBRARY "Set to FALSE to use system ORC instead of bundled" ${NOT_UNBUNDLED})
if (ARROW_LIBRARY) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include/orc/OrcFile.hh")
if(USE_INTERNAL_ORC_LIBRARY)
message(WARNING "submodule contrib/orc is missing. to fix try run: \n git submodule update --init --recursive")
set(USE_INTERNAL_ORC_LIBRARY 0)
endif()
set(MISSING_INTERNAL_ORC_LIBRARY 1)
endif ()
if (NOT USE_INTERNAL_ORC_LIBRARY)
find_package(orc)
endif ()
#if (USE_INTERNAL_ORC_LIBRARY)
#find_path(CYRUS_SASL_INCLUDE_DIR sasl/sasl.h)
#find_library(CYRUS_SASL_SHARED_LIB sasl2)
#if (NOT CYRUS_SASL_INCLUDE_DIR OR NOT CYRUS_SASL_SHARED_LIB)
# set(USE_ORC 0)
#endif()
#endif()
if (ORC_LIBRARY AND ORC_INCLUDE_DIR)
set(USE_ORC 1) set(USE_ORC 1)
elseif(NOT MISSING_INTERNAL_ORC_LIBRARY AND ARROW_LIBRARY) # (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY)
set(ORC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include")
set(ORC_LIBRARY orc)
set(USE_ORC 1)
else()
set(USE_INTERNAL_ORC_LIBRARY 0)
endif() endif()
endif()
message (STATUS "Using internal=${USE_INTERNAL_ORC_LIBRARY} orc=${USE_ORC}: ${ORC_INCLUDE_DIR} : ${ORC_LIBRARY}")

View File

@ -62,6 +62,7 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
endif() endif()
set(USE_PARQUET 1) set(USE_PARQUET 1)
set(USE_ORC 1)
endif() endif()
endif() endif()

View File

@ -1,13 +1,13 @@
option (USE_INTERNAL_SPARCEHASH_LIBRARY "Set to FALSE to use system sparsehash library instead of bundled" ${NOT_UNBUNDLED}) option (USE_INTERNAL_SPARSEHASH_LIBRARY "Set to FALSE to use system sparsehash library instead of bundled" ${NOT_UNBUNDLED})
if (NOT USE_INTERNAL_SPARCEHASH_LIBRARY) if (NOT USE_INTERNAL_SPARSEHASH_LIBRARY)
find_path (SPARCEHASH_INCLUDE_DIR NAMES sparsehash/sparse_hash_map PATHS ${SPARCEHASH_INCLUDE_PATHS}) find_path (SPARSEHASH_INCLUDE_DIR NAMES sparsehash/sparse_hash_map PATHS ${SPARSEHASH_INCLUDE_PATHS})
endif () endif ()
if (SPARCEHASH_INCLUDE_DIR) if (SPARSEHASH_INCLUDE_DIR)
else () else ()
set (USE_INTERNAL_SPARCEHASH_LIBRARY 1) set (USE_INTERNAL_SPARSEHASH_LIBRARY 1)
set (SPARCEHASH_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libsparsehash") set (SPARSEHASH_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sparsehash-c11")
endif () endif ()
message (STATUS "Using sparsehash: ${SPARCEHASH_INCLUDE_DIR}") message (STATUS "Using sparsehash: ${SPARSEHASH_INCLUDE_DIR}")

View File

@ -42,6 +42,19 @@ if (SANITIZE)
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
endif () endif ()
elseif (SANITIZE STREQUAL "libfuzzer")
# NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
# NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them (tests) have entry point for fuzzer and it's not checked.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link,address,undefined -fsanitize-address-use-after-scope")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link,address,undefined -fsanitize-address-use-after-scope")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=fuzzer-no-link,address,undefined -fsanitize-address-use-after-scope")
endif()
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan -static-libubsan")
endif ()
set (LIBFUZZER_CMAKE_CXX_FLAGS "-fsanitize=fuzzer,address,undefined -fsanitize-address-use-after-scope")
else () else ()
message (FATAL_ERROR "Unknown sanitizer type: ${SANITIZE}") message (FATAL_ERROR "Unknown sanitizer type: ${SANITIZE}")
endif () endif ()

View File

@ -10,19 +10,6 @@ endif ()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
if (USE_INTERNAL_ORC_LIBRARY)
set(BUILD_JAVA OFF)
set (ANALYZE_JAVA OFF)
set (BUILD_CPP_TESTS OFF)
set (BUILD_TOOLS OFF)
option(BUILD_JAVA OFF)
option (ANALYZE_JAVA OFF)
option (BUILD_CPP_TESTS OFF)
option (BUILD_TOOLS OFF)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/contrib/orc/cmake_modules")
add_subdirectory(orc)
endif()
if (USE_INTERNAL_BOOST_LIBRARY) if (USE_INTERNAL_BOOST_LIBRARY)
add_subdirectory (boost-cmake) add_subdirectory (boost-cmake)
endif () endif ()
@ -327,3 +314,7 @@ endif()
if (USE_FASTOPS) if (USE_FASTOPS)
add_subdirectory (fastops-cmake) add_subdirectory (fastops-cmake)
endif() endif()
#if (USE_INTERNAL_ORC_LIBRARY)
# add_subdirectory(orc-cmake)
#endif ()

View File

@ -56,11 +56,11 @@ set(ORC_SOURCE_WRAP_DIR ${ORC_SOURCE_DIR}/wrap)
set(ORC_BUILD_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/src) set(ORC_BUILD_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/src)
set(ORC_BUILD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/include) set(ORC_BUILD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/../orc/c++/include)
set(GOOGLE_PROTOBUF_DIR ${ClickHouse_SOURCE_DIR}/contrib/protobuf/src/) set(GOOGLE_PROTOBUF_DIR ${Protobuf_INCLUDE_DIR}/)
set(ORC_ADDITION_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(ORC_ADDITION_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(ARROW_SRC_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src) set(ARROW_SRC_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src)
set(PROTOBUF_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/../protobuf/cmake/protoc) set(PROTOBUF_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
set(PROTO_DIR ${ORC_SOURCE_DIR}/../proto) set(PROTO_DIR ${ORC_SOURCE_DIR}/../proto)
@ -70,14 +70,10 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
--cpp_out="${CMAKE_CURRENT_BINARY_DIR}" --cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
"${PROTO_DIR}/orc_proto.proto") "${PROTO_DIR}/orc_proto.proto")
include_directories(SYSTEM ${ORC_INCLUDE_DIR}) include(${ClickHouse_SOURCE_DIR}/contrib/orc/cmake_modules/CheckSourceCompiles.cmake)
include_directories(SYSTEM ${ORC_SOURCE_SRC_DIR}) include(orc_check.cmake)
include_directories(SYSTEM ${ORC_SOURCE_WRAP_DIR}) configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh")
include_directories(SYSTEM ${GOOGLE_PROTOBUF_DIR}) configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh")
include_directories(SYSTEM ${ORC_BUILD_SRC_DIR})
include_directories(SYSTEM ${ORC_BUILD_INCLUDE_DIR})
include_directories(SYSTEM ${ORC_ADDITION_SOURCE_DIR})
include_directories(SYSTEM ${ARROW_SRC_DIR})
set(ORC_SRCS set(ORC_SRCS
@ -232,6 +228,14 @@ if (ARROW_WITH_ZSTD)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZSTD_LIBRARY}) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZSTD_LIBRARY})
endif() endif()
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_INCLUDE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_SOURCE_SRC_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_SOURCE_WRAP_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${GOOGLE_PROTOBUF_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_SRC_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR})
# === parquet # === parquet

View File

@ -0,0 +1,130 @@
# Not changed part of contrib/orc/c++/src/CMakeLists.txt
INCLUDE(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("
#include<fcntl.h>
#include<unistd.h>
int main(int,char*[]){
int f = open(\"/x/y\", O_RDONLY);
char buf[100];
return pread(f, buf, 100, 1000) == 0;
}"
HAS_PREAD
)
CHECK_CXX_SOURCE_COMPILES("
#include<time.h>
int main(int,char*[]){
struct tm time2020;
return !strptime(\"2020-02-02 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2020);
}"
HAS_STRPTIME
)
CHECK_CXX_SOURCE_COMPILES("
#include<string>
int main(int,char* argv[]){
return static_cast<int>(std::stoll(argv[0]));
}"
HAS_STOLL
)
CHECK_CXX_SOURCE_COMPILES("
#include<stdint.h>
#include<stdio.h>
int main(int,char*[]){
int64_t x = 1; printf(\"%lld\",x);
}"
INT64_IS_LL
)
CHECK_CXX_SOURCE_COMPILES("
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored \"-Wdeprecated\"
#pragma clang diagnostic pop
#elif defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored \"-Wdeprecated\"
#pragma GCC diagnostic pop
#elif defined(_MSC_VER)
#pragma warning( push )
#pragma warning( disable : 4996 )
#pragma warning( pop )
#else
unknownCompiler!
#endif
int main(int, char *[]) {}"
HAS_DIAGNOSTIC_PUSH
)
CHECK_CXX_SOURCE_COMPILES("
#include<cmath>
int main(int, char *[]) {
return std::isnan(1.0f);
}"
HAS_STD_ISNAN
)
CHECK_CXX_SOURCE_COMPILES("
#include<mutex>
int main(int, char *[]) {
std::mutex test_mutex;
std::lock_guard<std::mutex> lock_mutex(test_mutex);
}"
HAS_STD_MUTEX
)
CHECK_CXX_SOURCE_COMPILES("
#include<string>
std::string func() {
std::string var = \"test\";
return std::move(var);
}
int main(int, char *[]) {}"
NEEDS_REDUNDANT_MOVE
)
INCLUDE(CheckCXXSourceRuns)
CHECK_CXX_SOURCE_RUNS("
#include<time.h>
int main(int, char *[]) {
time_t t = -14210715; // 1969-07-20 12:34:45
struct tm *ptm = gmtime(&t);
return !(ptm && ptm->tm_year == 69);
}"
HAS_PRE_1970
)
CHECK_CXX_SOURCE_RUNS("
#include<stdlib.h>
#include<time.h>
int main(int, char *[]) {
setenv(\"TZ\", \"America/Los_Angeles\", 1);
tzset();
struct tm time2037;
struct tm time2038;
strptime(\"2037-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2037);
strptime(\"2038-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2038);
return mktime(&time2038) - mktime(&time2037) != 31536000;
}"
HAS_POST_2038
)
set(CMAKE_REQUIRED_INCLUDES ${ZLIB_INCLUDE_DIR})
set(CMAKE_REQUIRED_LIBRARIES zlib)
CHECK_CXX_SOURCE_COMPILES("
#define Z_PREFIX
#include<zlib.h>
z_stream strm;
int main(int, char *[]) {
deflateReset(&strm);
}"
NEEDS_Z_PREFIX
)
# See https://cmake.org/cmake/help/v3.14/policy/CMP0075.html. Without unsetting it breaks thrift.
set(CMAKE_REQUIRED_INCLUDES)
set(CMAKE_REQUIRED_LIBRARIES)

View File

@ -199,17 +199,17 @@ if (WITH_KERBEROS)
endif() endif()
target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR}) target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR})
target_link_libraries(hdfs3 ${LIBGSASL_LIBRARY}) target_link_libraries(hdfs3 PRIVATE ${LIBGSASL_LIBRARY})
if (WITH_KERBEROS) if (WITH_KERBEROS)
target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES}) target_link_libraries(hdfs3 PRIVATE ${KERBEROS_LIBRARIES})
endif() endif()
target_link_libraries(hdfs3 ${LIBXML2_LIBRARY}) target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARY})
# inherit from parent cmake # inherit from parent cmake
target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR}) target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR})
target_link_libraries(hdfs3 ${Protobuf_LIBRARY}) target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY})
if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES) if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES)
target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR}) target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR})
target_link_libraries(hdfs3 ${OPENSSL_LIBRARIES}) target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES})
endif() endif()

View File

@ -1,2 +0,0 @@
google-sparsehash@googlegroups.com

View File

@ -1,28 +0,0 @@
Copyright (c) 2005, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,188 +0,0 @@
== 23 Ferbruary 2012 ==
A backwards incompatibility arose from flattening the include headers
structure for the <google> folder.
This is now fixed in 2.0.2. You only need to upgrade if you had previously
included files from the <google/sparsehash> folder.
== 1 February 2012 ==
A minor bug related to the namespace switch from google to sparsehash
stopped the build from working when perftools is also installed.
This is now fixed in 2.0.1. You only need to upgrade if you have perftools
installed.
== 31 January 2012 ==
I've just released sparsehash 2.0.
The `google-sparsehash` project has been renamed to `sparsehash`. I
(csilvers) am stepping down as maintainer, to be replaced by the team
of Donovan Hide and Geoff Pike. Welcome to the team, Donovan and
Geoff! Donovan has been an active contributor to sparsehash bug
reports and discussions in the past, and Geoff has been closely
involved with sparsehash inside Google (in addition to writing the
[http://code.google.com/p/cityhash CityHash hash function]). The two
of them together should be a formidable force. For good.
I bumped the major version number up to 2 to reflect the new community
ownership of the project. All the
[http://sparsehash.googlecode.com/svn/tags/sparsehash-2.0/ChangeLog changes]
are related to the renaming.
The only functional change from sparsehash 1.12 is that I've renamed
the `google/` include-directory to be `sparsehash/` instead. New code
should `#include <sparsehash/sparse_hash_map>`/etc. I've kept the old
names around as forwarding headers to the new, so `#include
<google/sparse_hash_map>` will continue to work.
Note that the classes and functions remain in the `google` C++
namespace (I didn't change that to `sparsehash` as well); I think
that's a trickier transition, and can happen in a future release.
=== 18 January 2011 ===
The `google-sparsehash` Google Code page has been renamed to
`sparsehash`, in preparation for the project being renamed to
`sparsehash`. In the coming weeks, I'll be stepping down as
maintainer for the sparsehash project, and as part of that Google is
relinquishing ownership of the project; it will now be entirely
community run. The name change reflects that shift.
=== 20 December 2011 ===
I've just released sparsehash 1.12. This release features improved
I/O (serialization) support. Support is finally added to serialize
and unserialize `dense_hash_map`/`set`, paralleling the existing code
for `sparse_hash_map`/`set`. In addition, the serialization API has
gotten simpler, with a single `serialize()` method to write to disk,
and an `unserialize()` method to read from disk. Finally, support has
gotten more generic, with built-in support for both C `FILE*`s and C++
streams, and an extension mechanism to support arbitrary sources and
sinks.
There are also more minor changes, including minor bugfixes, an
improved deleted-key test, and a minor addition to the `sparsetable`
API. See the [http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.12/ChangeLog ChangeLog]
for full details.
=== 23 June 2011 ===
I've just released sparsehash 1.11. The major user-visible change is
that the default behavior is improved -- using the hash_map/set is
faster -- for hashtables where the key is a pointer. We now notice
that case and ignore the low 2-3 bits (which are almost always 0 for
pointers) when hashing.
Another user-visible change is we've removed the tests for whether the
STL (vector, pair, etc) is defined in the 'std' namespace. gcc 2.95
is the most recent compiler I know of to put STL types and functions
in the global namespace. If you need to use such an old compiler, do
not update to the latest sparsehash release.
We've also changed the internal tools we use to integrate
Googler-supplied patches to sparsehash into the opensource release.
These new tools should result in more frequent updates with better
change descriptions. They will also result in future ChangeLog
entries being much more verbose (for better or for worse).
A full list of changes is described in
[http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.11/ChangeLog ChangeLog].
=== 21 January 2011 ===
I've just released sparsehash 1.10. This fixes a performance
regression in sparsehash 1.8, where sparse_hash_map would copy
hashtable keys by value even when the key was explicitly a reference.
It also fixes compiler warnings from MSVC 10, which uses some c++0x
features that did not interact well with sparsehash.
There is no reason to upgrade unless you use references for your
hashtable keys, or compile with MSVC 10. A full list of changes is
described in
[http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.10/ChangeLog ChangeLog].
=== 24 September 2010 ===
I've just released sparsehash 1.9. This fixes a size regression in
sparsehash 1.8, where the new allocator would take up space in
`sparse_hash_map`, doubling the sparse_hash_map overhead (from 1-2
bits per bucket to 3 or so). All users are encouraged to upgrade.
This change also marks enums as being Plain Old Data, which can speed
up hashtables with enum keys and/or values. A full list of changes is
described in
[http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.9/ChangeLog ChangeLog].
=== 29 July 2010 ===
I've just released sparsehash 1.8. This includes improved support for
`Allocator`, including supporting the allocator constructor arg and
`get_allocator()` access method.
To work around a bug in gcc 4.0.x, I've renamed the static variables
`HT_OCCUPANCY_FLT` and `HT_SHRINK_FLT` to `HT_OCCUPANCY_PCT` and
`HT_SHRINK_PCT`, and changed their type from float to int. This
should not be a user-visible change, since these variables are only
used in the internal hashtable classes (sparsehash clients should use
`max_load_factor()` and `min_load_factor()` instead of modifying these
static variables), but if you do access these constants, you will need
to change your code.
Internally, the biggest change is a revamp of the test suite. It now
has more complete coverage, and a more capable timing tester. There
are other, more minor changes as well. A full list of changes is
described in the
[http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.8/ChangeLog ChangeLog].
=== 31 March 2010 ===
I've just released sparsehash 1.7. The major news here is the
addition of `Allocator` support. Previously, these hashtable classes
would just ignore the `Allocator` template parameter. They now
respect it, and even inherit `size_type`, `pointer`, etc. from the
allocator class. By default, they use a special allocator we provide
that uses libc `malloc` and `free` to allocate. The hash classes
notice when this special allocator is being used, and use `realloc`
when it can. This means that the default allocator is significantly
faster than custom allocators are likely to be (since realloc-like
functionality is not supported by STL allocators).
There are a few more minor changes as well. A full list of changes is
described in the
[http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.7/ChangeLog ChangeLog].
=== 11 January 2010 ===
I've just released sparsehash 1.6. The API has widened a bit with the
addition of `deleted_key()` and `empty_key()`, which let you query
what values these keys have. A few rather obscure bugs have been
fixed (such as an error when copying one hashtable into another when
the empty_keys differ). A full list of changes is described in the
[http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.6/ChangeLog ChangeLog].
=== 9 May 2009 ===
I've just released sparsehash 1.5.1. Hot on the heels of sparsehash
1.5, this release fixes a longstanding bug in the sparsehash code,
where `equal_range` would always return an empty range. It now works
as documented. All sparsehash users are encouraged to upgrade.
=== 7 May 2009 ===
I've just released sparsehash 1.5. This release introduces tr1
compatibility: I've added `rehash`, `begin(i)`, and other methods that
are expected to be part of the `unordered_map` API once `tr1` in
introduced. This allows `sparse_hash_map`, `dense_hash_map`,
`sparse_hash_set`, and `dense_hash_set` to be (almost) drop-in
replacements for `unordered_map` and `unordered_set`.
There is no need to upgrade unless you need this functionality, or
need one of the other, more minor, changes described in the
[http://google-sparsehash.googlecode.com/svn/tags/sparsehash-1.5/ChangeLog ChangeLog].

View File

@ -1,143 +0,0 @@
This directory contains several hash-map implementations, similar in
API to SGI's hash_map class, but with different performance
characteristics. sparse_hash_map uses very little space overhead, 1-2
bits per entry. dense_hash_map is very fast, particularly on lookup.
(sparse_hash_set and dense_hash_set are the set versions of these
routines.) On the other hand, these classes have requirements that
may not make them appropriate for all applications.
All these implementation use a hashtable with internal quadratic
probing. This method is space-efficient -- there is no pointer
overhead -- and time-efficient for good hash functions.
COMPILING
---------
To compile test applications with these classes, run ./configure
followed by make. To install these header files on your system, run
'make install'. (On Windows, the instructions are different; see
README_windows.txt.) See INSTALL for more details.
This code should work on any modern C++ system. It has been tested on
Linux (Ubuntu, Fedora, RedHat, Debian), Solaris 10 x86, FreeBSD 6.0,
OS X 10.3 and 10.4, and Windows under both VC++7 and VC++8.
USING
-----
See the html files in the doc directory for small example programs
that use these classes. It's enough to just include the header file:
#include <sparsehash/sparse_hash_map> // or sparse_hash_set, dense_hash_map, ...
google::sparse_hash_set<int, int> number_mapper;
and use the class the way you would other hash-map implementations.
(Though see "API" below for caveats.)
By default (you can change it via a flag to ./configure), these hash
implementations are defined in the google namespace.
API
---
The API for sparse_hash_map, dense_hash_map, sparse_hash_set, and
dense_hash_set, are a superset of the API of SGI's hash_map class.
See doc/sparse_hash_map.html, et al., for more information about the
API.
The usage of these classes differ from SGI's hash_map, and other
hashtable implementations, in the following major ways:
1) dense_hash_map requires you to set aside one key value as the
'empty bucket' value, set via the set_empty_key() method. This
*MUST* be called before you can use the dense_hash_map. It is
illegal to insert any elements into a dense_hash_map whose key is
equal to the empty-key.
2) For both dense_hash_map and sparse_hash_map, if you wish to delete
elements from the hashtable, you must set aside a key value as the
'deleted bucket' value, set via the set_deleted_key() method. If
your hash-map is insert-only, there is no need to call this
method. If you call set_deleted_key(), it is illegal to insert any
elements into a dense_hash_map or sparse_hash_map whose key is
equal to the deleted-key.
3) These hash-map implementation support I/O. See below.
There are also some smaller differences:
1) The constructor takes an optional argument that specifies the
number of elements you expect to insert into the hashtable. This
differs from SGI's hash_map implementation, which takes an optional
number of buckets.
2) erase() does not immediately reclaim memory. As a consequence,
erase() does not invalidate any iterators, making loops like this
correct:
for (it = ht.begin(); it != ht.end(); ++it)
if (...) ht.erase(it);
As another consequence, a series of erase() calls can leave your
hashtable using more memory than it needs to. The hashtable will
automatically compact at the next call to insert(), but to
manually compact a hashtable, you can call
ht.resize(0)
I/O
---
In addition to the normal hash-map operations, sparse_hash_map can
read and write hashtables to disk. (dense_hash_map also has the API,
but it has not yet been implemented, and writes will always fail.)
In the simplest case, writing a hashtable is as easy as calling two
methods on the hashtable:
ht.write_metadata(fp);
ht.write_nopointer_data(fp);
Reading in this data is equally simple:
google::sparse_hash_map<...> ht;
ht.read_metadata(fp);
ht.read_nopointer_data(fp);
The above is sufficient if the key and value do not contain any
pointers: they are basic C types or agglomorations of basic C types.
If the key and/or value do contain pointers, you can still store the
hashtable by replacing write_nopointer_data() with a custom writing
routine. See sparse_hash_map.html et al. for more information.
SPARSETABLE
-----------
In addition to the hash-map and hash-set classes, this package also
provides sparsetable.h, an array implementation that uses space
proportional to the number of elements in the array, rather than the
maximum element index. It uses very little space overhead: 1 bit per
entry. See doc/sparsetable.html for the API.
RESOURCE USAGE
--------------
* sparse_hash_map has memory overhead of about 2 bits per hash-map
entry.
* dense_hash_map has a factor of 2-3 memory overhead: if your
hashtable data takes X bytes, dense_hash_map will use 3X-4X memory
total.
Hashtables tend to double in size when resizing, creating an
additional 50% space overhead. dense_hash_map does in fact have a
significant "high water mark" memory use requirement.
sparse_hash_map, however, is written to need very little space
overhead when resizing: only a few bits per hashtable entry.
PERFORMANCE
-----------
You can compile and run the included file time_hash_map.cc to examine
the performance of sparse_hash_map, dense_hash_map, and your native
hash_map implementation on your system. One test against the
SGI hash_map implementation gave the following timing information for
a simple find() call:
SGI hash_map: 22 ns
dense_hash_map: 13 ns
sparse_hash_map: 117 ns
SGI map: 113 ns
See doc/performance.html for more detailed charts on resource usage
and performance data.
---
16 March 2005
(Last updated: 12 September 2010)

View File

@ -1,369 +0,0 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ----
//
// This is just a very thin wrapper over densehashtable.h, just
// like sgi stl's stl_hash_map is a very thin wrapper over
// stl_hashtable. The major thing we define is operator[], because
// we have a concept of a data_type which stl_hashtable doesn't
// (it only has a key and a value).
//
// NOTE: this is exactly like sparse_hash_map.h, with the word
// "sparse" replaced by "dense", except for the addition of
// set_empty_key().
//
// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
//
// Otherwise your program will die in mysterious ways. (Note if you
// use the constructor that takes an InputIterator range, you pass in
// the empty key in the constructor, rather than after. As a result,
// this constructor differs from the standard STL version.)
//
// In other respects, we adhere mostly to the STL semantics for
// hash-map. One important exception is that insert() may invalidate
// iterators entirely -- STL semantics are that insert() may reorder
// iterators, but they all still refer to something valid in the
// hashtable. Not so for us. Likewise, insert() may invalidate
// pointers into the hashtable. (Whether insert invalidates iterators
// and pointers depends on whether it results in a hashtable resize).
// On the plus side, delete() doesn't invalidate iterators or pointers
// at all, or even change the ordering of elements.
//
// Here are a few "power user" tips:
//
// 1) set_deleted_key():
// If you want to use erase() you *must* call set_deleted_key(),
// in addition to set_empty_key(), after construction.
// The deleted and empty keys must differ.
//
// 2) resize(0):
// When an item is deleted, its memory isn't freed right
// away. This allows you to iterate over a hashtable,
// and call erase(), without invalidating the iterator.
// To force the memory to be freed, call resize(0).
// For tr1 compatibility, this can also be called as rehash(0).
//
// 3) min_load_factor(0.0)
// Setting the minimum load factor to 0.0 guarantees that
// the hash table will never shrink.
//
// Roughly speaking:
// (1) dense_hash_map: fastest, uses the most memory unless entries are small
// (2) sparse_hash_map: slowest, uses the least memory
// (3) hash_map / unordered_map (STL): in the middle
//
// Typically I use sparse_hash_map when I care about space and/or when
// I need to save the hashtable on disk. I use hash_map otherwise. I
// don't personally use dense_hash_set ever; some people use it for
// small sets with lots of lookups.
//
// - dense_hash_map has, typically, about 78% memory overhead (if your
// data takes up X bytes, the hash_map uses .78X more bytes in overhead).
// - sparse_hash_map has about 4 bits overhead per entry.
// - sparse_hash_map can be 3-7 times slower than the others for lookup and,
// especially, inserts. See time_hash_map.cc for details.
//
// See /usr/(local/)?doc/sparsehash-*/dense_hash_map.html
// for information about how to use this class.
#ifndef _DENSE_HASH_MAP_H_
#define _DENSE_HASH_MAP_H_
#include <sparsehash/internal/sparseconfig.h>
#include <algorithm> // needed by stl_alloc
#include <functional> // for equal_to<>, select1st<>, etc
#include <memory> // for alloc
#include <utility> // for pair<>
#include <sparsehash/internal/densehashtable.h> // IWYU pragma: export
#include <sparsehash/internal/libc_allocator_with_realloc.h>
#include HASH_FUN_H // for hash<>
_START_GOOGLE_NAMESPACE_
template <class Key, class T,
class HashFcn = SPARSEHASH_HASH<Key>, // defined in sparseconfig.h
class EqualKey = std::equal_to<Key>,
class Alloc = libc_allocator_with_realloc<std::pair<const Key, T> > >
class dense_hash_map {
private:
// Apparently select1st is not stl-standard, so we define our own
struct SelectKey {
typedef const Key& result_type;
const Key& operator()(const std::pair<const Key, T>& p) const {
return p.first;
}
};
struct SetKey {
void operator()(std::pair<const Key, T>* value, const Key& new_key) const {
*const_cast<Key*>(&value->first) = new_key;
// It would be nice to clear the rest of value here as well, in
// case it's taking up a lot of memory. We do this by clearing
// the value. This assumes T has a zero-arg constructor!
value->second = T();
}
};
// For operator[].
struct DefaultValue {
std::pair<const Key, T> operator()(const Key& key) {
return std::make_pair(key, T());
}
};
// The actual data
typedef dense_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey,
SetKey, EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef Alloc allocator_type;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
typedef typename ht::local_iterator local_iterator;
typedef typename ht::const_local_iterator const_local_iterator;
// Iterator functions
iterator begin() { return rep.begin(); }
iterator end() { return rep.end(); }
const_iterator begin() const { return rep.begin(); }
const_iterator end() const { return rep.end(); }
// These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
local_iterator begin(size_type i) { return rep.begin(i); }
local_iterator end(size_type i) { return rep.end(i); }
const_local_iterator begin(size_type i) const { return rep.begin(i); }
const_local_iterator end(size_type i) const { return rep.end(i); }
// Accessor functions
allocator_type get_allocator() const { return rep.get_allocator(); }
hasher hash_funct() const { return rep.hash_funct(); }
hasher hash_function() const { return hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
// Constructors
explicit dense_hash_map(size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) {
}
template <class InputIterator>
dense_hash_map(InputIterator f, InputIterator l,
const key_type& empty_key_val,
size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) {
set_empty_key(empty_key_val);
rep.insert(f, l);
}
// We use the default copy constructor
// We use the default operator=()
// We use the default destructor
void clear() { rep.clear(); }
// This clears the hash map without resizing it down to the minimum
// bucket count, but rather keeps the number of buckets constant
void clear_no_resize() { rep.clear_no_resize(); }
void swap(dense_hash_map& hs) { rep.swap(hs.rep); }
// Functions concerning size
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
// These are tr1 methods. bucket() is the bucket the key is or would be in.
size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
size_type bucket(const key_type& key) const { return rep.bucket(key); }
float load_factor() const {
return size() * 1.0f / bucket_count();
}
float max_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return grow;
}
void max_load_factor(float new_grow) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(shrink, new_grow);
}
// These aren't tr1 methods but perhaps ought to be.
float min_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return shrink;
}
void min_load_factor(float new_shrink) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(new_shrink, grow);
}
// Deprecated; use min_load_factor() or max_load_factor() instead.
void set_resizing_parameters(float shrink, float grow) {
rep.set_resizing_parameters(shrink, grow);
}
void resize(size_type hint) { rep.resize(hint); }
void rehash(size_type hint) { resize(hint); } // the tr1 name
// Lookup routines
iterator find(const key_type& key) { return rep.find(key); }
const_iterator find(const key_type& key) const { return rep.find(key); }
data_type& operator[](const key_type& key) { // This is our value-add!
// If key is in the hashtable, returns find(key)->second,
// otherwise returns insert(value_type(key, T()).first->second.
// Note it does not create an empty T unless the find fails.
return rep.template find_or_insert<DefaultValue>(key).second;
}
size_type count(const key_type& key) const { return rep.count(key); }
std::pair<iterator, iterator> equal_range(const key_type& key) {
return rep.equal_range(key);
}
std::pair<const_iterator, const_iterator> equal_range(const key_type& key)
const {
return rep.equal_range(key);
}
// Insertion routines
std::pair<iterator, bool> insert(const value_type& obj) {
return rep.insert(obj);
}
template <class InputIterator> void insert(InputIterator f, InputIterator l) {
rep.insert(f, l);
}
void insert(const_iterator f, const_iterator l) {
rep.insert(f, l);
}
// Required for std::insert_iterator; the passed-in iterator is ignored.
iterator insert(iterator, const value_type& obj) {
return insert(obj).first;
}
// Deletion and empty routines
// THESE ARE NON-STANDARD! I make you specify an "impossible" key
// value to identify deleted and empty buckets. You can change the
// deleted key as time goes on, or get rid of it entirely to be insert-only.
void set_empty_key(const key_type& key) { // YOU MUST CALL THIS!
rep.set_empty_key(value_type(key, data_type())); // rep wants a value
}
key_type empty_key() const {
return rep.empty_key().first; // rep returns a value
}
void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
void clear_deleted_key() { rep.clear_deleted_key(); }
key_type deleted_key() const { return rep.deleted_key(); }
// These are standard
size_type erase(const key_type& key) { return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
// Comparison
bool operator==(const dense_hash_map& hs) const { return rep == hs.rep; }
bool operator!=(const dense_hash_map& hs) const { return rep != hs.rep; }
// I/O -- this is an add-on for writing hash map to disk
//
// For maximum flexibility, this does not assume a particular
// file type (though it will probably be a FILE *). We just pass
// the fp through to rep.
// If your keys and values are simple enough, you can pass this
// serializer to serialize()/unserialize(). "Simple enough" means
// value_type is a POD type that contains no pointers. Note,
// however, we don't try to normalize endianness.
typedef typename ht::NopointerSerializer NopointerSerializer;
// serializer: a class providing operator()(OUTPUT*, const value_type&)
// (writing value_type to OUTPUT). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
// pointer to a class providing size_t Write(const void*, size_t),
// which writes a buffer into a stream (which fp presumably
// owns) and returns the number of bytes successfully written.
// Note basic_ostream<not_char> is not currently supported.
template <typename ValueSerializer, typename OUTPUT>
bool serialize(ValueSerializer serializer, OUTPUT* fp) {
return rep.serialize(serializer, fp);
}
// serializer: a functor providing operator()(INPUT*, value_type*)
// (reading from INPUT and into value_type). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
// pointer to a class providing size_t Read(void*, size_t),
// which reads into a buffer from a stream (which fp presumably
// owns) and returns the number of bytes successfully read.
// Note basic_istream<not_char> is not currently supported.
// NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
// may need to do a const cast in order to fill in the key.
template <typename ValueSerializer, typename INPUT>
bool unserialize(ValueSerializer serializer, INPUT* fp) {
return rep.unserialize(serializer, fp);
}
};
// We need a global swap as well
template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
inline void swap(dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) {
hm1.swap(hm2);
}
_END_GOOGLE_NAMESPACE_
#endif /* _DENSE_HASH_MAP_H_ */

View File

@ -1,338 +0,0 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
//
// This is just a very thin wrapper over densehashtable.h, just
// like sgi stl's stl_hash_set is a very thin wrapper over
// stl_hashtable. The major thing we define is operator[], because
// we have a concept of a data_type which stl_hashtable doesn't
// (it only has a key and a value).
//
// This is more different from dense_hash_map than you might think,
// because all iterators for sets are const (you obviously can't
// change the key, and for sets there is no value).
//
// NOTE: this is exactly like sparse_hash_set.h, with the word
// "sparse" replaced by "dense", except for the addition of
// set_empty_key().
//
// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
//
// Otherwise your program will die in mysterious ways. (Note if you
// use the constructor that takes an InputIterator range, you pass in
// the empty key in the constructor, rather than after. As a result,
// this constructor differs from the standard STL version.)
//
// In other respects, we adhere mostly to the STL semantics for
// hash-map. One important exception is that insert() may invalidate
// iterators entirely -- STL semantics are that insert() may reorder
// iterators, but they all still refer to something valid in the
// hashtable. Not so for us. Likewise, insert() may invalidate
// pointers into the hashtable. (Whether insert invalidates iterators
// and pointers depends on whether it results in a hashtable resize).
// On the plus side, delete() doesn't invalidate iterators or pointers
// at all, or even change the ordering of elements.
//
// Here are a few "power user" tips:
//
// 1) set_deleted_key():
// If you want to use erase() you must call set_deleted_key(),
// in addition to set_empty_key(), after construction.
// The deleted and empty keys must differ.
//
// 2) resize(0):
// When an item is deleted, its memory isn't freed right
// away. This allows you to iterate over a hashtable,
// and call erase(), without invalidating the iterator.
// To force the memory to be freed, call resize(0).
// For tr1 compatibility, this can also be called as rehash(0).
//
// 3) min_load_factor(0.0)
// Setting the minimum load factor to 0.0 guarantees that
// the hash table will never shrink.
//
// Roughly speaking:
// (1) dense_hash_set: fastest, uses the most memory unless entries are small
// (2) sparse_hash_set: slowest, uses the least memory
// (3) hash_set / unordered_set (STL): in the middle
//
// Typically I use sparse_hash_set when I care about space and/or when
// I need to save the hashtable on disk. I use hash_set otherwise. I
// don't personally use dense_hash_set ever; some people use it for
// small sets with lots of lookups.
//
// - dense_hash_set has, typically, about 78% memory overhead (if your
// data takes up X bytes, the hash_set uses .78X more bytes in overhead).
// - sparse_hash_set has about 4 bits overhead per entry.
// - sparse_hash_set can be 3-7 times slower than the others for lookup and,
// especially, inserts. See time_hash_map.cc for details.
//
// See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html
// for information about how to use this class.
#ifndef _DENSE_HASH_SET_H_
#define _DENSE_HASH_SET_H_
#include <sparsehash/internal/sparseconfig.h>
#include <algorithm> // needed by stl_alloc
#include <functional> // for equal_to<>, select1st<>, etc
#include <memory> // for alloc
#include <utility> // for pair<>
#include <sparsehash/internal/densehashtable.h> // IWYU pragma: export
#include <sparsehash/internal/libc_allocator_with_realloc.h>
#include HASH_FUN_H // for hash<>
_START_GOOGLE_NAMESPACE_
template <class Value,
class HashFcn = SPARSEHASH_HASH<Value>, // defined in sparseconfig.h
class EqualKey = std::equal_to<Value>,
class Alloc = libc_allocator_with_realloc<Value> >
class dense_hash_set {
private:
// Apparently identity is not stl-standard, so we define our own
struct Identity {
typedef const Value& result_type;
const Value& operator()(const Value& v) const { return v; }
};
struct SetKey {
void operator()(Value* value, const Value& new_key) const {
*value = new_key;
}
};
// The actual data
typedef dense_hashtable<Value, Value, HashFcn, Identity, SetKey,
EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef Alloc allocator_type;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::const_pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::const_reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
typedef typename ht::const_local_iterator local_iterator;
typedef typename ht::const_local_iterator const_local_iterator;
// Iterator functions -- recall all iterators are const
iterator begin() const { return rep.begin(); }
iterator end() const { return rep.end(); }
// These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
local_iterator begin(size_type i) const { return rep.begin(i); }
local_iterator end(size_type i) const { return rep.end(i); }
// Accessor functions
allocator_type get_allocator() const { return rep.get_allocator(); }
hasher hash_funct() const { return rep.hash_funct(); }
hasher hash_function() const { return hash_funct(); } // tr1 name
key_equal key_eq() const { return rep.key_eq(); }
// Constructors
explicit dense_hash_set(size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
}
template <class InputIterator>
dense_hash_set(InputIterator f, InputIterator l,
const key_type& empty_key_val,
size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
set_empty_key(empty_key_val);
rep.insert(f, l);
}
// We use the default copy constructor
// We use the default operator=()
// We use the default destructor
void clear() { rep.clear(); }
// This clears the hash set without resizing it down to the minimum
// bucket count, but rather keeps the number of buckets constant
void clear_no_resize() { rep.clear_no_resize(); }
void swap(dense_hash_set& hs) { rep.swap(hs.rep); }
// Functions concerning size
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
// These are tr1 methods. bucket() is the bucket the key is or would be in.
size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
size_type bucket(const key_type& key) const { return rep.bucket(key); }
float load_factor() const {
return size() * 1.0f / bucket_count();
}
float max_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return grow;
}
void max_load_factor(float new_grow) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(shrink, new_grow);
}
// These aren't tr1 methods but perhaps ought to be.
float min_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return shrink;
}
void min_load_factor(float new_shrink) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(new_shrink, grow);
}
// Deprecated; use min_load_factor() or max_load_factor() instead.
void set_resizing_parameters(float shrink, float grow) {
rep.set_resizing_parameters(shrink, grow);
}
void resize(size_type hint) { rep.resize(hint); }
void rehash(size_type hint) { resize(hint); } // the tr1 name
// Lookup routines
iterator find(const key_type& key) const { return rep.find(key); }
size_type count(const key_type& key) const { return rep.count(key); }
std::pair<iterator, iterator> equal_range(const key_type& key) const {
return rep.equal_range(key);
}
// Insertion routines
std::pair<iterator, bool> insert(const value_type& obj) {
std::pair<typename ht::iterator, bool> p = rep.insert(obj);
return std::pair<iterator, bool>(p.first, p.second); // const to non-const
}
template <class InputIterator> void insert(InputIterator f, InputIterator l) {
rep.insert(f, l);
}
void insert(const_iterator f, const_iterator l) {
rep.insert(f, l);
}
// Required for std::insert_iterator; the passed-in iterator is ignored.
iterator insert(iterator, const value_type& obj) {
return insert(obj).first;
}
// Deletion and empty routines
// THESE ARE NON-STANDARD! I make you specify an "impossible" key
// value to identify deleted and empty buckets. You can change the
// deleted key as time goes on, or get rid of it entirely to be insert-only.
void set_empty_key(const key_type& key) { rep.set_empty_key(key); }
key_type empty_key() const { return rep.empty_key(); }
void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
void clear_deleted_key() { rep.clear_deleted_key(); }
key_type deleted_key() const { return rep.deleted_key(); }
// These are standard
size_type erase(const key_type& key) { return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
// Comparison
bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; }
bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; }
// I/O -- this is an add-on for writing metainformation to disk
//
// For maximum flexibility, this does not assume a particular
// file type (though it will probably be a FILE *). We just pass
// the fp through to rep.
// If your keys and values are simple enough, you can pass this
// serializer to serialize()/unserialize(). "Simple enough" means
// value_type is a POD type that contains no pointers. Note,
// however, we don't try to normalize endianness.
typedef typename ht::NopointerSerializer NopointerSerializer;
// serializer: a class providing operator()(OUTPUT*, const value_type&)
// (writing value_type to OUTPUT). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
// pointer to a class providing size_t Write(const void*, size_t),
// which writes a buffer into a stream (which fp presumably
// owns) and returns the number of bytes successfully written.
// Note basic_ostream<not_char> is not currently supported.
template <typename ValueSerializer, typename OUTPUT>
bool serialize(ValueSerializer serializer, OUTPUT* fp) {
return rep.serialize(serializer, fp);
}
// serializer: a functor providing operator()(INPUT*, value_type*)
// (reading from INPUT and into value_type). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
// pointer to a class providing size_t Read(void*, size_t),
// which reads into a buffer from a stream (which fp presumably
// owns) and returns the number of bytes successfully read.
// Note basic_istream<not_char> is not currently supported.
template <typename ValueSerializer, typename INPUT>
bool unserialize(ValueSerializer serializer, INPUT* fp) {
return rep.unserialize(serializer, fp);
}
};
template <class Val, class HashFcn, class EqualKey, class Alloc>
inline void swap(dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) {
hs1.swap(hs2);
}
_END_GOOGLE_NAMESPACE_
#endif /* _DENSE_HASH_SET_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,381 +0,0 @@
// Copyright (c) 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
//
// Provides classes shared by both sparse and dense hashtable.
//
// sh_hashtable_settings has parameters for growing and shrinking
// a hashtable. It also packages zero-size functor (ie. hasher).
//
// Other functions and classes provide common code for serializing
// and deserializing hashtables to a stream (such as a FILE*).
#ifndef UTIL_GTL_HASHTABLE_COMMON_H_
#define UTIL_GTL_HASHTABLE_COMMON_H_
#include <sparsehash/internal/sparseconfig.h>
#include <assert.h>
#include <stdio.h>
#include <stddef.h> // for size_t
#include <iosfwd>
#include <stdexcept> // For length_error
_START_GOOGLE_NAMESPACE_
template <bool> struct SparsehashCompileAssert { };
#define SPARSEHASH_COMPILE_ASSERT(expr, msg) \
static_assert(expr, #msg)
namespace sparsehash_internal {
// Adaptor methods for reading/writing data from an INPUT or OUPTUT
// variable passed to serialize() or unserialize(). For now we
// have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note
// they are pointers, unlike typical use), or else a pointer to
// something that supports a Read()/Write() method.
//
// For technical reasons, we implement read_data/write_data in two
// stages. The actual work is done in *_data_internal, which takes
// the stream argument twice: once as a template type, and once with
// normal type information. (We only use the second version.) We do
// this because of how C++ picks what function overload to use. If we
// implemented this the naive way:
// bool read_data(istream* is, const void* data, size_t length);
// template<typename T> read_data(T* fp, const void* data, size_t length);
// C++ would prefer the second version for every stream type except
// istream. However, we want C++ to prefer the first version for
// streams that are *subclasses* of istream, such as istringstream.
// This is not possible given the way template types are resolved. So
// we split the stream argument in two, one of which is templated and
// one of which is not. The specialized functions (like the istream
// version above) ignore the template arg and use the second, 'type'
// arg, getting subclass matching as normal. The 'catch-all'
// functions (the second version above) use the template arg to deduce
// the type, and use a second, void* arg to achieve the desired
// 'catch-all' semantics.
// ----- low-level I/O for FILE* ----
template<typename Ignored>
inline bool read_data_internal(Ignored*, FILE* fp,
void* data, size_t length) {
return fread(data, length, 1, fp) == 1;
}
template<typename Ignored>
inline bool write_data_internal(Ignored*, FILE* fp,
const void* data, size_t length) {
return fwrite(data, length, 1, fp) == 1;
}
// ----- low-level I/O for iostream ----
// We want the caller to be responsible for #including <iostream>, not
// us, because iostream is a big header! According to the standard,
// it's only legal to delay the instantiation the way we want to if
// the istream/ostream is a template type. So we jump through hoops.
template<typename ISTREAM>
inline bool read_data_internal_for_istream(ISTREAM* fp,
void* data, size_t length) {
return fp->read(reinterpret_cast<char*>(data), length).good();
}
template<typename Ignored>
inline bool read_data_internal(Ignored*, std::istream* fp,
void* data, size_t length) {
return read_data_internal_for_istream(fp, data, length);
}
template<typename OSTREAM>
inline bool write_data_internal_for_ostream(OSTREAM* fp,
const void* data, size_t length) {
return fp->write(reinterpret_cast<const char*>(data), length).good();
}
template<typename Ignored>
inline bool write_data_internal(Ignored*, std::ostream* fp,
const void* data, size_t length) {
return write_data_internal_for_ostream(fp, data, length);
}
// ----- low-level I/O for custom streams ----
// The INPUT type needs to support a Read() method that takes a
// buffer and a length and returns the number of bytes read.
template <typename INPUT>
inline bool read_data_internal(INPUT* fp, void*,
void* data, size_t length) {
return static_cast<size_t>(fp->Read(data, length)) == length;
}
// The OUTPUT type needs to support a Write() operation that takes
// a buffer and a length and returns the number of bytes written.
template <typename OUTPUT>
inline bool write_data_internal(OUTPUT* fp, void*,
const void* data, size_t length) {
return static_cast<size_t>(fp->Write(data, length)) == length;
}
// ----- low-level I/O: the public API ----
template <typename INPUT>
inline bool read_data(INPUT* fp, void* data, size_t length) {
return read_data_internal(fp, fp, data, length);
}
template <typename OUTPUT>
inline bool write_data(OUTPUT* fp, const void* data, size_t length) {
return write_data_internal(fp, fp, data, length);
}
// Uses read_data() and write_data() to read/write an integer.
// length is the number of bytes to read/write (which may differ
// from sizeof(IntType), allowing us to save on a 32-bit system
// and load on a 64-bit system). Excess bytes are taken to be 0.
// INPUT and OUTPUT must match legal inputs to read/write_data (above).
template <typename INPUT, typename IntType>
bool read_bigendian_number(INPUT* fp, IntType* value, size_t length) {
*value = 0;
unsigned char byte;
// We require IntType to be unsigned or else the shifting gets all screwy.
SPARSEHASH_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
serializing_int_requires_an_unsigned_type);
for (size_t i = 0; i < length; ++i) {
if (!read_data(fp, &byte, sizeof(byte))) return false;
*value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
}
return true;
}
template <typename OUTPUT, typename IntType>
bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length) {
unsigned char byte;
// We require IntType to be unsigned or else the shifting gets all screwy.
SPARSEHASH_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
serializing_int_requires_an_unsigned_type);
for (size_t i = 0; i < length; ++i) {
byte = (sizeof(value) <= length-1 - i)
? 0 : static_cast<unsigned char>((value >> ((length-1 - i) * 8)) & 255);
if (!write_data(fp, &byte, sizeof(byte))) return false;
}
return true;
}
// If your keys and values are simple enough, you can pass this
// serializer to serialize()/unserialize(). "Simple enough" means
// value_type is a POD type that contains no pointers. Note,
// however, we don't try to normalize endianness.
// This is the type used for NopointerSerializer.
template <typename value_type> struct pod_serializer {
template <typename INPUT>
bool operator()(INPUT* fp, value_type* value) const {
return read_data(fp, value, sizeof(*value));
}
template <typename OUTPUT>
bool operator()(OUTPUT* fp, const value_type& value) const {
return write_data(fp, &value, sizeof(value));
}
};
// Settings contains parameters for growing and shrinking the table.
// It also packages zero-size functor (ie. hasher).
//
// It does some munging of the hash value in cases where we think
// (fear) the original hash function might not be very good. In
// particular, the default hash of pointers is the identity hash,
// so probably all the low bits are 0. We identify when we think
// we're hashing a pointer, and chop off the low bits. Note this
// isn't perfect: even when the key is a pointer, we can't tell
// for sure that the hash is the identity hash. If it's not, this
// is needless work (and possibly, though not likely, harmful).
template<typename Key, typename HashFunc,
typename SizeType, int HT_MIN_BUCKETS>
class sh_hashtable_settings : public HashFunc {
public:
typedef Key key_type;
typedef HashFunc hasher;
typedef SizeType size_type;
public:
sh_hashtable_settings(const hasher& hf,
const float ht_occupancy_flt,
const float ht_empty_flt)
: hasher(hf),
enlarge_threshold_(0),
shrink_threshold_(0),
consider_shrink_(false),
use_empty_(false),
use_deleted_(false),
num_ht_copies_(0) {
set_enlarge_factor(ht_occupancy_flt);
set_shrink_factor(ht_empty_flt);
}
size_type hash(const key_type& v) const {
// We munge the hash value when we don't trust hasher::operator().
return hash_munger<Key>::MungedHash(hasher::operator()(v));
}
float enlarge_factor() const {
return enlarge_factor_;
}
void set_enlarge_factor(float f) {
enlarge_factor_ = f;
}
float shrink_factor() const {
return shrink_factor_;
}
void set_shrink_factor(float f) {
shrink_factor_ = f;
}
size_type enlarge_threshold() const {
return enlarge_threshold_;
}
void set_enlarge_threshold(size_type t) {
enlarge_threshold_ = t;
}
size_type shrink_threshold() const {
return shrink_threshold_;
}
void set_shrink_threshold(size_type t) {
shrink_threshold_ = t;
}
size_type enlarge_size(size_type x) const {
return static_cast<size_type>(x * enlarge_factor_);
}
size_type shrink_size(size_type x) const {
return static_cast<size_type>(x * shrink_factor_);
}
bool consider_shrink() const {
return consider_shrink_;
}
void set_consider_shrink(bool t) {
consider_shrink_ = t;
}
bool use_empty() const {
return use_empty_;
}
void set_use_empty(bool t) {
use_empty_ = t;
}
bool use_deleted() const {
return use_deleted_;
}
void set_use_deleted(bool t) {
use_deleted_ = t;
}
size_type num_ht_copies() const {
return static_cast<size_type>(num_ht_copies_);
}
void inc_num_ht_copies() {
++num_ht_copies_;
}
// Reset the enlarge and shrink thresholds
void reset_thresholds(size_type num_buckets) {
set_enlarge_threshold(enlarge_size(num_buckets));
set_shrink_threshold(shrink_size(num_buckets));
// whatever caused us to reset already considered
set_consider_shrink(false);
}
// Caller is resposible for calling reset_threshold right after
// set_resizing_parameters.
void set_resizing_parameters(float shrink, float grow) {
assert(shrink >= 0.0);
assert(grow <= 1.0);
if (shrink > grow/2.0f)
shrink = grow / 2.0f; // otherwise we thrash hashtable size
set_shrink_factor(shrink);
set_enlarge_factor(grow);
}
// This is the smallest size a hashtable can be without being too crowded
// If you like, you can give a min #buckets as well as a min #elts
size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) {
float enlarge = enlarge_factor();
size_type sz = HT_MIN_BUCKETS; // min buckets allowed
while ( sz < min_buckets_wanted ||
num_elts >= static_cast<size_type>(sz * enlarge) ) {
// This just prevents overflowing size_type, since sz can exceed
// max_size() here.
if (static_cast<size_type>(sz * 2) < sz) {
throw std::length_error("resize overflow"); // protect against overflow
}
sz *= 2;
}
return sz;
}
private:
template<class HashKey> class hash_munger {
public:
static size_t MungedHash(size_t hash) {
return hash;
}
};
// This matches when the hashtable key is a pointer.
template<class HashKey> class hash_munger<HashKey*> {
public:
static size_t MungedHash(size_t hash) {
// TODO(csilvers): consider rotating instead:
// static const int shift = (sizeof(void *) == 4) ? 2 : 3;
// return (hash << (sizeof(hash) * 8) - shift)) | (hash >> shift);
// This matters if we ever change sparse/dense_hash_* to compare
// hashes before comparing actual values. It's speedy on x86.
return hash / sizeof(void*); // get rid of known-0 bits
}
};
size_type enlarge_threshold_; // table.size() * enlarge_factor
size_type shrink_threshold_; // table.size() * shrink_factor
float enlarge_factor_; // how full before resize
float shrink_factor_; // how empty before resize
// consider_shrink=true if we should try to shrink before next insert
bool consider_shrink_;
bool use_empty_; // used only by densehashtable, not sparsehashtable
bool use_deleted_; // false until delkey has been set
// num_ht_copies is a counter incremented every Copy/Move
unsigned int num_ht_copies_;
};
} // namespace sparsehash_internal
#undef SPARSEHASH_COMPILE_ASSERT
_END_GOOGLE_NAMESPACE_
#endif // UTIL_GTL_HASHTABLE_COMMON_H_

View File

@ -1,119 +0,0 @@
// Copyright (c) 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
#ifndef UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
#define UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
#include <sparsehash/internal/sparseconfig.h>
#include <stdlib.h> // for malloc/realloc/free
#include <stddef.h> // for ptrdiff_t
#include <new> // for placement new
_START_GOOGLE_NAMESPACE_
template<class T>
class libc_allocator_with_realloc {
public:
typedef T value_type;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
libc_allocator_with_realloc() {}
libc_allocator_with_realloc(const libc_allocator_with_realloc&) {}
~libc_allocator_with_realloc() {}
pointer address(reference r) const { return &r; }
const_pointer address(const_reference r) const { return &r; }
pointer allocate(size_type n, const_pointer = 0) {
return static_cast<pointer>(malloc(n * sizeof(value_type)));
}
void deallocate(pointer p, size_type) {
free(p);
}
pointer reallocate(pointer p, size_type n) {
return static_cast<pointer>(realloc(p, n * sizeof(value_type)));
}
size_type max_size() const {
return static_cast<size_type>(-1) / sizeof(value_type);
}
void construct(pointer p, const value_type& val) {
new(p) value_type(val);
}
void destroy(pointer p) { p->~value_type(); }
template <class U>
libc_allocator_with_realloc(const libc_allocator_with_realloc<U>&) {}
template<class U>
struct rebind {
typedef libc_allocator_with_realloc<U> other;
};
};
// libc_allocator_with_realloc<void> specialization.
template<>
class libc_allocator_with_realloc<void> {
public:
typedef void value_type;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef void* pointer;
typedef const void* const_pointer;
template<class U>
struct rebind {
typedef libc_allocator_with_realloc<U> other;
};
};
template<class T>
inline bool operator==(const libc_allocator_with_realloc<T>&,
const libc_allocator_with_realloc<T>&) {
return true;
}
template<class T>
inline bool operator!=(const libc_allocator_with_realloc<T>&,
const libc_allocator_with_realloc<T>&) {
return false;
}
_END_GOOGLE_NAMESPACE_
#endif // UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_

View File

@ -1,46 +0,0 @@
/*
* NOTE: This file is for internal use only.
* Do not use these #defines in your own program!
*/
/* Namespace for Google classes */
#define GOOGLE_NAMESPACE ::google
/* the location of the header defining hash functions */
#define HASH_FUN_H <functional>
/* the namespace of the hash<> function */
#define HASH_NAMESPACE std
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if the system has the type `long long'. */
#define HAVE_LONG_LONG 1
/* Define to 1 if you have the `memcpy' function. */
#define HAVE_MEMCPY 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if the system has the type `uint16_t'. */
#define HAVE_UINT16_T 1
/* Define to 1 if the system has the type `u_int16_t'. */
#define HAVE_U_INT16_T 1
/* Define to 1 if the system has the type `__uint16'. */
/* #undef HAVE___UINT16 */
/* The system-provided hash function including the namespace. */
#define SPARSEHASH_HASH HASH_NAMESPACE::hash
/* Stops putting the code inside the Google namespace */
#define _END_GOOGLE_NAMESPACE_ }
/* Puts following code inside the Google namespace */
#define _START_GOOGLE_NAMESPACE_ namespace google {

File diff suppressed because it is too large Load Diff

View File

@ -1,363 +0,0 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
//
// This is just a very thin wrapper over sparsehashtable.h, just
// like sgi stl's stl_hash_map is a very thin wrapper over
// stl_hashtable. The major thing we define is operator[], because
// we have a concept of a data_type which stl_hashtable doesn't
// (it only has a key and a value).
//
// We adhere mostly to the STL semantics for hash-map. One important
// exception is that insert() may invalidate iterators entirely -- STL
// semantics are that insert() may reorder iterators, but they all
// still refer to something valid in the hashtable. Not so for us.
// Likewise, insert() may invalidate pointers into the hashtable.
// (Whether insert invalidates iterators and pointers depends on
// whether it results in a hashtable resize). On the plus side,
// delete() doesn't invalidate iterators or pointers at all, or even
// change the ordering of elements.
//
// Here are a few "power user" tips:
//
// 1) set_deleted_key():
// Unlike STL's hash_map, if you want to use erase() you
// *must* call set_deleted_key() after construction.
//
// 2) resize(0):
// When an item is deleted, its memory isn't freed right
// away. This is what allows you to iterate over a hashtable
// and call erase() without invalidating the iterator.
// To force the memory to be freed, call resize(0).
// For tr1 compatibility, this can also be called as rehash(0).
//
// 3) min_load_factor(0.0)
// Setting the minimum load factor to 0.0 guarantees that
// the hash table will never shrink.
//
// Roughly speaking:
// (1) dense_hash_map: fastest, uses the most memory unless entries are small
// (2) sparse_hash_map: slowest, uses the least memory
// (3) hash_map / unordered_map (STL): in the middle
//
// Typically I use sparse_hash_map when I care about space and/or when
// I need to save the hashtable on disk. I use hash_map otherwise. I
// don't personally use dense_hash_map ever; some people use it for
// small maps with lots of lookups.
//
// - dense_hash_map has, typically, about 78% memory overhead (if your
// data takes up X bytes, the hash_map uses .78X more bytes in overhead).
// - sparse_hash_map has about 4 bits overhead per entry.
// - sparse_hash_map can be 3-7 times slower than the others for lookup and,
// especially, inserts. See time_hash_map.cc for details.
//
// See /usr/(local/)?doc/sparsehash-*/sparse_hash_map.html
// for information about how to use this class.
#ifndef _SPARSE_HASH_MAP_H_
#define _SPARSE_HASH_MAP_H_
#include <sparsehash/internal/sparseconfig.h>
#include <algorithm> // needed by stl_alloc
#include <functional> // for equal_to<>, select1st<>, etc
#include <memory> // for alloc
#include <utility> // for pair<>
#include <sparsehash/internal/libc_allocator_with_realloc.h>
#include <sparsehash/internal/sparsehashtable.h> // IWYU pragma: export
#include HASH_FUN_H // for hash<>
_START_GOOGLE_NAMESPACE_
template <class Key, class T,
class HashFcn = SPARSEHASH_HASH<Key>, // defined in sparseconfig.h
class EqualKey = std::equal_to<Key>,
class Alloc = libc_allocator_with_realloc<std::pair<const Key, T> > >
class sparse_hash_map {
private:
// Apparently select1st is not stl-standard, so we define our own
struct SelectKey {
typedef const Key& result_type;
const Key& operator()(const std::pair<const Key, T>& p) const {
return p.first;
}
};
struct SetKey {
void operator()(std::pair<const Key, T>* value, const Key& new_key) const {
*const_cast<Key*>(&value->first) = new_key;
// It would be nice to clear the rest of value here as well, in
// case it's taking up a lot of memory. We do this by clearing
// the value. This assumes T has a zero-arg constructor!
value->second = T();
}
};
// For operator[].
struct DefaultValue {
std::pair<const Key, T> operator()(const Key& key) {
return std::make_pair(key, T());
}
};
// The actual data
typedef sparse_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey,
SetKey, EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef Alloc allocator_type;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
typedef typename ht::local_iterator local_iterator;
typedef typename ht::const_local_iterator const_local_iterator;
// Iterator functions
iterator begin() { return rep.begin(); }
iterator end() { return rep.end(); }
const_iterator begin() const { return rep.begin(); }
const_iterator end() const { return rep.end(); }
// These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
local_iterator begin(size_type i) { return rep.begin(i); }
local_iterator end(size_type i) { return rep.end(i); }
const_local_iterator begin(size_type i) const { return rep.begin(i); }
const_local_iterator end(size_type i) const { return rep.end(i); }
// Accessor functions
allocator_type get_allocator() const { return rep.get_allocator(); }
hasher hash_funct() const { return rep.hash_funct(); }
hasher hash_function() const { return hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
// Constructors
explicit sparse_hash_map(size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) {
}
template <class InputIterator>
sparse_hash_map(InputIterator f, InputIterator l,
size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) {
rep.insert(f, l);
}
// We use the default copy constructor
// We use the default operator=()
// We use the default destructor
void clear() { rep.clear(); }
void swap(sparse_hash_map& hs) { rep.swap(hs.rep); }
// Functions concerning size
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
// These are tr1 methods. bucket() is the bucket the key is or would be in.
size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
size_type bucket(const key_type& key) const { return rep.bucket(key); }
float load_factor() const {
return size() * 1.0f / bucket_count();
}
float max_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return grow;
}
void max_load_factor(float new_grow) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(shrink, new_grow);
}
// These aren't tr1 methods but perhaps ought to be.
float min_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return shrink;
}
void min_load_factor(float new_shrink) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(new_shrink, grow);
}
// Deprecated; use min_load_factor() or max_load_factor() instead.
void set_resizing_parameters(float shrink, float grow) {
rep.set_resizing_parameters(shrink, grow);
}
void resize(size_type hint) { rep.resize(hint); }
void rehash(size_type hint) { resize(hint); } // the tr1 name
// Lookup routines
iterator find(const key_type& key) { return rep.find(key); }
const_iterator find(const key_type& key) const { return rep.find(key); }
data_type& operator[](const key_type& key) { // This is our value-add!
// If key is in the hashtable, returns find(key)->second,
// otherwise returns insert(value_type(key, T()).first->second.
// Note it does not create an empty T unless the find fails.
return rep.template find_or_insert<DefaultValue>(key).second;
}
size_type count(const key_type& key) const { return rep.count(key); }
std::pair<iterator, iterator> equal_range(const key_type& key) {
return rep.equal_range(key);
}
std::pair<const_iterator, const_iterator> equal_range(const key_type& key)
const {
return rep.equal_range(key);
}
// Insertion routines
std::pair<iterator, bool> insert(const value_type& obj) {
return rep.insert(obj);
}
template <class InputIterator> void insert(InputIterator f, InputIterator l) {
rep.insert(f, l);
}
void insert(const_iterator f, const_iterator l) {
rep.insert(f, l);
}
// Required for std::insert_iterator; the passed-in iterator is ignored.
iterator insert(iterator, const value_type& obj) {
return insert(obj).first;
}
// Deletion routines
// THESE ARE NON-STANDARD! I make you specify an "impossible" key
// value to identify deleted buckets. You can change the key as
// time goes on, or get rid of it entirely to be insert-only.
void set_deleted_key(const key_type& key) {
rep.set_deleted_key(key);
}
void clear_deleted_key() { rep.clear_deleted_key(); }
key_type deleted_key() const { return rep.deleted_key(); }
// These are standard
size_type erase(const key_type& key) { return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
// Comparison
bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; }
bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; }
// I/O -- this is an add-on for writing metainformation to disk
//
// For maximum flexibility, this does not assume a particular
// file type (though it will probably be a FILE *). We just pass
// the fp through to rep.
// If your keys and values are simple enough, you can pass this
// serializer to serialize()/unserialize(). "Simple enough" means
// value_type is a POD type that contains no pointers. Note,
// however, we don't try to normalize endianness.
typedef typename ht::NopointerSerializer NopointerSerializer;
// serializer: a class providing operator()(OUTPUT*, const value_type&)
// (writing value_type to OUTPUT). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
// pointer to a class providing size_t Write(const void*, size_t),
// which writes a buffer into a stream (which fp presumably
// owns) and returns the number of bytes successfully written.
// Note basic_ostream<not_char> is not currently supported.
template <typename ValueSerializer, typename OUTPUT>
bool serialize(ValueSerializer serializer, OUTPUT* fp) {
return rep.serialize(serializer, fp);
}
// serializer: a functor providing operator()(INPUT*, value_type*)
// (reading from INPUT and into value_type). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
// pointer to a class providing size_t Read(void*, size_t),
// which reads into a buffer from a stream (which fp presumably
// owns) and returns the number of bytes successfully read.
// Note basic_istream<not_char> is not currently supported.
// NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
// may need to do a const cast in order to fill in the key.
// NOTE: if Key or T are not POD types, the serializer MUST use
// placement-new to initialize their values, rather than a normal
// equals-assignment or similar. (The value_type* passed into the
// serializer points to garbage memory.)
template <typename ValueSerializer, typename INPUT>
bool unserialize(ValueSerializer serializer, INPUT* fp) {
return rep.unserialize(serializer, fp);
}
// The four methods below are DEPRECATED.
// Use serialize() and unserialize() for new code.
template <typename OUTPUT>
bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); }
template <typename INPUT>
bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); }
template <typename OUTPUT>
bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
template <typename INPUT>
bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); }
};
// We need a global swap as well
template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
inline void swap(sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) {
hm1.swap(hm2);
}
_END_GOOGLE_NAMESPACE_
#endif /* _SPARSE_HASH_MAP_H_ */

View File

@ -1,338 +0,0 @@
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
//
// This is just a very thin wrapper over sparsehashtable.h, just
// like sgi stl's stl_hash_set is a very thin wrapper over
// stl_hashtable. The major thing we define is operator[], because
// we have a concept of a data_type which stl_hashtable doesn't
// (it only has a key and a value).
//
// This is more different from sparse_hash_map than you might think,
// because all iterators for sets are const (you obviously can't
// change the key, and for sets there is no value).
//
// We adhere mostly to the STL semantics for hash-map. One important
// exception is that insert() may invalidate iterators entirely -- STL
// semantics are that insert() may reorder iterators, but they all
// still refer to something valid in the hashtable. Not so for us.
// Likewise, insert() may invalidate pointers into the hashtable.
// (Whether insert invalidates iterators and pointers depends on
// whether it results in a hashtable resize). On the plus side,
// delete() doesn't invalidate iterators or pointers at all, or even
// change the ordering of elements.
//
// Here are a few "power user" tips:
//
// 1) set_deleted_key():
// Unlike STL's hash_map, if you want to use erase() you
// *must* call set_deleted_key() after construction.
//
// 2) resize(0):
// When an item is deleted, its memory isn't freed right
// away. This allows you to iterate over a hashtable,
// and call erase(), without invalidating the iterator.
// To force the memory to be freed, call resize(0).
// For tr1 compatibility, this can also be called as rehash(0).
//
// 3) min_load_factor(0.0)
// Setting the minimum load factor to 0.0 guarantees that
// the hash table will never shrink.
//
// Roughly speaking:
// (1) dense_hash_set: fastest, uses the most memory unless entries are small
// (2) sparse_hash_set: slowest, uses the least memory
// (3) hash_set / unordered_set (STL): in the middle
//
// Typically I use sparse_hash_set when I care about space and/or when
// I need to save the hashtable on disk. I use hash_set otherwise. I
// don't personally use dense_hash_set ever; some people use it for
// small sets with lots of lookups.
//
// - dense_hash_set has, typically, about 78% memory overhead (if your
// data takes up X bytes, the hash_set uses .78X more bytes in overhead).
// - sparse_hash_set has about 4 bits overhead per entry.
// - sparse_hash_set can be 3-7 times slower than the others for lookup and,
// especially, inserts. See time_hash_map.cc for details.
//
// See /usr/(local/)?doc/sparsehash-*/sparse_hash_set.html
// for information about how to use this class.
#ifndef _SPARSE_HASH_SET_H_
#define _SPARSE_HASH_SET_H_
#include <sparsehash/internal/sparseconfig.h>
#include <algorithm> // needed by stl_alloc
#include <functional> // for equal_to<>
#include <memory> // for alloc (which we don't use)
#include <utility> // for pair<>
#include <sparsehash/internal/libc_allocator_with_realloc.h>
#include <sparsehash/internal/sparsehashtable.h> // IWYU pragma: export
#include HASH_FUN_H // for hash<>
_START_GOOGLE_NAMESPACE_
template <class Value,
class HashFcn = SPARSEHASH_HASH<Value>, // defined in sparseconfig.h
class EqualKey = std::equal_to<Value>,
class Alloc = libc_allocator_with_realloc<Value> >
class sparse_hash_set {
private:
// Apparently identity is not stl-standard, so we define our own
struct Identity {
typedef const Value& result_type;
const Value& operator()(const Value& v) const { return v; }
};
struct SetKey {
void operator()(Value* value, const Value& new_key) const {
*value = new_key;
}
};
typedef sparse_hashtable<Value, Value, HashFcn, Identity, SetKey,
EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef Alloc allocator_type;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::const_pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::const_reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
typedef typename ht::const_local_iterator local_iterator;
typedef typename ht::const_local_iterator const_local_iterator;
// Iterator functions -- recall all iterators are const
iterator begin() const { return rep.begin(); }
iterator end() const { return rep.end(); }
// These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
local_iterator begin(size_type i) const { return rep.begin(i); }
local_iterator end(size_type i) const { return rep.end(i); }
// Accessor functions
allocator_type get_allocator() const { return rep.get_allocator(); }
hasher hash_funct() const { return rep.hash_funct(); }
hasher hash_function() const { return hash_funct(); } // tr1 name
key_equal key_eq() const { return rep.key_eq(); }
// Constructors
explicit sparse_hash_set(size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
}
template <class InputIterator>
sparse_hash_set(InputIterator f, InputIterator l,
size_type expected_max_items_in_table = 0,
const hasher& hf = hasher(),
const key_equal& eql = key_equal(),
const allocator_type& alloc = allocator_type())
: rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
rep.insert(f, l);
}
// We use the default copy constructor
// We use the default operator=()
// We use the default destructor
void clear() { rep.clear(); }
void swap(sparse_hash_set& hs) { rep.swap(hs.rep); }
// Functions concerning size
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
// These are tr1 methods. bucket() is the bucket the key is or would be in.
size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
size_type bucket(const key_type& key) const { return rep.bucket(key); }
float load_factor() const {
return size() * 1.0f / bucket_count();
}
float max_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return grow;
}
void max_load_factor(float new_grow) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(shrink, new_grow);
}
// These aren't tr1 methods but perhaps ought to be.
float min_load_factor() const {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
return shrink;
}
void min_load_factor(float new_shrink) {
float shrink, grow;
rep.get_resizing_parameters(&shrink, &grow);
rep.set_resizing_parameters(new_shrink, grow);
}
// Deprecated; use min_load_factor() or max_load_factor() instead.
void set_resizing_parameters(float shrink, float grow) {
rep.set_resizing_parameters(shrink, grow);
}
void resize(size_type hint) { rep.resize(hint); }
void rehash(size_type hint) { resize(hint); } // the tr1 name
// Lookup routines
iterator find(const key_type& key) const { return rep.find(key); }
size_type count(const key_type& key) const { return rep.count(key); }
std::pair<iterator, iterator> equal_range(const key_type& key) const {
return rep.equal_range(key);
}
// Insertion routines
std::pair<iterator, bool> insert(const value_type& obj) {
std::pair<typename ht::iterator, bool> p = rep.insert(obj);
return std::pair<iterator, bool>(p.first, p.second); // const to non-const
}
template <class InputIterator> void insert(InputIterator f, InputIterator l) {
rep.insert(f, l);
}
void insert(const_iterator f, const_iterator l) {
rep.insert(f, l);
}
// Required for std::insert_iterator; the passed-in iterator is ignored.
iterator insert(iterator, const value_type& obj) {
return insert(obj).first;
}
// Deletion routines
// THESE ARE NON-STANDARD! I make you specify an "impossible" key
// value to identify deleted buckets. You can change the key as
// time goes on, or get rid of it entirely to be insert-only.
void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
void clear_deleted_key() { rep.clear_deleted_key(); }
key_type deleted_key() const { return rep.deleted_key(); }
// These are standard
size_type erase(const key_type& key) { return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
// Comparison
bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; }
bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; }
// I/O -- this is an add-on for writing metainformation to disk
//
// For maximum flexibility, this does not assume a particular
// file type (though it will probably be a FILE *). We just pass
// the fp through to rep.
// If your keys and values are simple enough, you can pass this
// serializer to serialize()/unserialize(). "Simple enough" means
// value_type is a POD type that contains no pointers. Note,
// however, we don't try to normalize endianness.
typedef typename ht::NopointerSerializer NopointerSerializer;
// serializer: a class providing operator()(OUTPUT*, const value_type&)
// (writing value_type to OUTPUT). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
// pointer to a class providing size_t Write(const void*, size_t),
// which writes a buffer into a stream (which fp presumably
// owns) and returns the number of bytes successfully written.
// Note basic_ostream<not_char> is not currently supported.
template <typename ValueSerializer, typename OUTPUT>
bool serialize(ValueSerializer serializer, OUTPUT* fp) {
return rep.serialize(serializer, fp);
}
// serializer: a functor providing operator()(INPUT*, value_type*)
// (reading from INPUT and into value_type). You can specify a
// NopointerSerializer object if appropriate (see above).
// fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
// pointer to a class providing size_t Read(void*, size_t),
// which reads into a buffer from a stream (which fp presumably
// owns) and returns the number of bytes successfully read.
// Note basic_istream<not_char> is not currently supported.
// NOTE: Since value_type is const Key, ValueSerializer
// may need to do a const cast in order to fill in the key.
// NOTE: if Key is not a POD type, the serializer MUST use
// placement-new to initialize its value, rather than a normal
// equals-assignment or similar. (The value_type* passed into
// the serializer points to garbage memory.)
template <typename ValueSerializer, typename INPUT>
bool unserialize(ValueSerializer serializer, INPUT* fp) {
return rep.unserialize(serializer, fp);
}
// The four methods below are DEPRECATED.
// Use serialize() and unserialize() for new code.
template <typename OUTPUT>
bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); }
template <typename INPUT>
bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); }
template <typename OUTPUT>
bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
template <typename INPUT>
bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); }
};
template <class Val, class HashFcn, class EqualKey, class Alloc>
inline void swap(sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) {
hs1.swap(hs2);
}
_END_GOOGLE_NAMESPACE_
#endif /* _SPARSE_HASH_SET_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -1,134 +0,0 @@
// Copyright 2005 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ----
//
// Template metaprogramming utility functions.
//
// This code is compiled directly on many platforms, including client
// platforms like Windows, Mac, and embedded systems. Before making
// any changes here, make sure that you're not breaking any platforms.
//
//
// The names choosen here reflect those used in tr1 and the boost::mpl
// library, there are similar operations used in the Loki library as
// well. I prefer the boost names for 2 reasons:
// 1. I think that portions of the Boost libraries are more likely to
// be included in the c++ standard.
// 2. It is not impossible that some of the boost libraries will be
// included in our own build in the future.
// Both of these outcomes means that we may be able to directly replace
// some of these with boost equivalents.
//
#ifndef BASE_TEMPLATE_UTIL_H_
#define BASE_TEMPLATE_UTIL_H_
#include <sparsehash/internal/sparseconfig.h>
_START_GOOGLE_NAMESPACE_
// Types small_ and big_ are guaranteed such that sizeof(small_) <
// sizeof(big_)
typedef char small_;
struct big_ {
char dummy[2];
};
// Identity metafunction.
template <class T>
struct identity_ {
typedef T type;
};
// integral_constant, defined in tr1, is a wrapper for an integer
// value. We don't really need this generality; we could get away
// with hardcoding the integer type to bool. We use the fully
// general integer_constant for compatibility with tr1.
template<class T, T v>
struct integral_constant {
static const T value = v;
typedef T value_type;
typedef integral_constant<T, v> type;
};
template <class T, T v> const T integral_constant<T, v>::value;
// Abbreviations: true_type and false_type are structs that represent boolean
// true and false values. Also define the boost::mpl versions of those names,
// true_ and false_.
typedef integral_constant<bool, true> true_type;
typedef integral_constant<bool, false> false_type;
typedef true_type true_;
typedef false_type false_;
// if_ is a templatized conditional statement.
// if_<cond, A, B> is a compile time evaluation of cond.
// if_<>::type contains A if cond is true, B otherwise.
template<bool cond, typename A, typename B>
struct if_{
typedef A type;
};
template<typename A, typename B>
struct if_<false, A, B> {
typedef B type;
};
// type_equals_ is a template type comparator, similar to Loki IsSameType.
// type_equals_<A, B>::value is true iff "A" is the same type as "B".
//
// New code should prefer base::is_same, defined in base/type_traits.h.
// It is functionally identical, but is_same is the standard spelling.
template<typename A, typename B>
struct type_equals_ : public false_ {
};
template<typename A>
struct type_equals_<A, A> : public true_ {
};
// and_ is a template && operator.
// and_<A, B>::value evaluates "A::value && B::value".
template<typename A, typename B>
struct and_ : public integral_constant<bool, (A::value && B::value)> {
};
// or_ is a template || operator.
// or_<A, B>::value evaluates "A::value || B::value".
template<typename A, typename B>
struct or_ : public integral_constant<bool, (A::value || B::value)> {
};
_END_GOOGLE_NAMESPACE_
#endif // BASE_TEMPLATE_UTIL_H_

View File

@ -1,342 +0,0 @@
// Copyright (c) 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ----
//
// This code is compiled directly on many platforms, including client
// platforms like Windows, Mac, and embedded systems. Before making
// any changes here, make sure that you're not breaking any platforms.
//
// Define a small subset of tr1 type traits. The traits we define are:
// is_integral
// is_floating_point
// is_pointer
// is_enum
// is_reference
// is_pod
// has_trivial_constructor
// has_trivial_copy
// has_trivial_assign
// has_trivial_destructor
// remove_const
// remove_volatile
// remove_cv
// remove_reference
// add_reference
// remove_pointer
// is_same
// is_convertible
// We can add more type traits as required.
#ifndef BASE_TYPE_TRAITS_H_
#define BASE_TYPE_TRAITS_H_
#include <sparsehash/internal/sparseconfig.h>
#include <utility> // For pair
#include <sparsehash/template_util.h> // For true_type and false_type
_START_GOOGLE_NAMESPACE_
template <class T> struct is_integral;
template <class T> struct is_floating_point;
template <class T> struct is_pointer;
// MSVC can't compile this correctly, and neither can gcc 3.3.5 (at least)
#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3)
// is_enum uses is_convertible, which is not available on MSVC.
template <class T> struct is_enum;
#endif
template <class T> struct is_reference;
template <class T> struct is_pod;
template <class T> struct has_trivial_constructor;
template <class T> struct has_trivial_copy;
template <class T> struct has_trivial_assign;
template <class T> struct has_trivial_destructor;
template <class T> struct remove_const;
template <class T> struct remove_volatile;
template <class T> struct remove_cv;
template <class T> struct remove_reference;
template <class T> struct add_reference;
template <class T> struct remove_pointer;
template <class T, class U> struct is_same;
#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3)
template <class From, class To> struct is_convertible;
#endif
// is_integral is false except for the built-in integer types. A
// cv-qualified type is integral if and only if the underlying type is.
template <class T> struct is_integral : false_type { };
template<> struct is_integral<bool> : true_type { };
template<> struct is_integral<char> : true_type { };
template<> struct is_integral<unsigned char> : true_type { };
template<> struct is_integral<signed char> : true_type { };
#if defined(_MSC_VER)
// wchar_t is not by default a distinct type from unsigned short in
// Microsoft C.
// See http://msdn2.microsoft.com/en-us/library/dh8che7s(VS.80).aspx
template<> struct is_integral<__wchar_t> : true_type { };
#else
template<> struct is_integral<wchar_t> : true_type { };
#endif
template<> struct is_integral<short> : true_type { };
template<> struct is_integral<unsigned short> : true_type { };
template<> struct is_integral<int> : true_type { };
template<> struct is_integral<unsigned int> : true_type { };
template<> struct is_integral<long> : true_type { };
template<> struct is_integral<unsigned long> : true_type { };
#ifdef HAVE_LONG_LONG
template<> struct is_integral<long long> : true_type { };
template<> struct is_integral<unsigned long long> : true_type { };
#endif
template <class T> struct is_integral<const T> : is_integral<T> { };
template <class T> struct is_integral<volatile T> : is_integral<T> { };
template <class T> struct is_integral<const volatile T> : is_integral<T> { };
// is_floating_point is false except for the built-in floating-point types.
// A cv-qualified type is integral if and only if the underlying type is.
template <class T> struct is_floating_point : false_type { };
template<> struct is_floating_point<float> : true_type { };
template<> struct is_floating_point<double> : true_type { };
template<> struct is_floating_point<long double> : true_type { };
template <class T> struct is_floating_point<const T>
: is_floating_point<T> { };
template <class T> struct is_floating_point<volatile T>
: is_floating_point<T> { };
template <class T> struct is_floating_point<const volatile T>
: is_floating_point<T> { };
// is_pointer is false except for pointer types. A cv-qualified type (e.g.
// "int* const", as opposed to "int const*") is cv-qualified if and only if
// the underlying type is.
template <class T> struct is_pointer : false_type { };
template <class T> struct is_pointer<T*> : true_type { };
template <class T> struct is_pointer<const T> : is_pointer<T> { };
template <class T> struct is_pointer<volatile T> : is_pointer<T> { };
template <class T> struct is_pointer<const volatile T> : is_pointer<T> { };
#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3)
namespace internal {
template <class T> struct is_class_or_union {
template <class U> static small_ tester(void (U::*)());
template <class U> static big_ tester(...);
static const bool value = sizeof(tester<T>(0)) == sizeof(small_);
};
// is_convertible chokes if the first argument is an array. That's why
// we use add_reference here.
template <bool NotUnum, class T> struct is_enum_impl
: is_convertible<typename add_reference<T>::type, int> { };
template <class T> struct is_enum_impl<true, T> : false_type { };
} // namespace internal
// Specified by TR1 [4.5.1] primary type categories.
// Implementation note:
//
// Each type is either void, integral, floating point, array, pointer,
// reference, member object pointer, member function pointer, enum,
// union or class. Out of these, only integral, floating point, reference,
// class and enum types are potentially convertible to int. Therefore,
// if a type is not a reference, integral, floating point or class and
// is convertible to int, it's a enum. Adding cv-qualification to a type
// does not change whether it's an enum.
//
// Is-convertible-to-int check is done only if all other checks pass,
// because it can't be used with some types (e.g. void or classes with
// inaccessible conversion operators).
template <class T> struct is_enum
: internal::is_enum_impl<
is_same<T, void>::value ||
is_integral<T>::value ||
is_floating_point<T>::value ||
is_reference<T>::value ||
internal::is_class_or_union<T>::value,
T> { };
template <class T> struct is_enum<const T> : is_enum<T> { };
template <class T> struct is_enum<volatile T> : is_enum<T> { };
template <class T> struct is_enum<const volatile T> : is_enum<T> { };
#endif
// is_reference is false except for reference types.
template<typename T> struct is_reference : false_type {};
template<typename T> struct is_reference<T&> : true_type {};
// We can't get is_pod right without compiler help, so fail conservatively.
// We will assume it's false except for arithmetic types, enumerations,
// pointers and cv-qualified versions thereof. Note that std::pair<T,U>
// is not a POD even if T and U are PODs.
template <class T> struct is_pod
: integral_constant<bool, (is_integral<T>::value ||
is_floating_point<T>::value ||
#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3)
// is_enum is not available on MSVC.
is_enum<T>::value ||
#endif
is_pointer<T>::value)> { };
template <class T> struct is_pod<const T> : is_pod<T> { };
template <class T> struct is_pod<volatile T> : is_pod<T> { };
template <class T> struct is_pod<const volatile T> : is_pod<T> { };
// We can't get has_trivial_constructor right without compiler help, so
// fail conservatively. We will assume it's false except for: (1) types
// for which is_pod is true. (2) std::pair of types with trivial
// constructors. (3) array of a type with a trivial constructor.
// (4) const versions thereof.
template <class T> struct has_trivial_constructor : is_pod<T> { };
template <class T, class U> struct has_trivial_constructor<std::pair<T, U> >
: integral_constant<bool,
(has_trivial_constructor<T>::value &&
has_trivial_constructor<U>::value)> { };
template <class A, int N> struct has_trivial_constructor<A[N]>
: has_trivial_constructor<A> { };
template <class T> struct has_trivial_constructor<const T>
: has_trivial_constructor<T> { };
// We can't get has_trivial_copy right without compiler help, so fail
// conservatively. We will assume it's false except for: (1) types
// for which is_pod is true. (2) std::pair of types with trivial copy
// constructors. (3) array of a type with a trivial copy constructor.
// (4) const versions thereof.
template <class T> struct has_trivial_copy : is_pod<T> { };
template <class T, class U> struct has_trivial_copy<std::pair<T, U> >
: integral_constant<bool,
(has_trivial_copy<T>::value &&
has_trivial_copy<U>::value)> { };
template <class A, int N> struct has_trivial_copy<A[N]>
: has_trivial_copy<A> { };
template <class T> struct has_trivial_copy<const T> : has_trivial_copy<T> { };
// We can't get has_trivial_assign right without compiler help, so fail
// conservatively. We will assume it's false except for: (1) types
// for which is_pod is true. (2) std::pair of types with trivial copy
// constructors. (3) array of a type with a trivial assign constructor.
template <class T> struct has_trivial_assign : is_pod<T> { };
template <class T, class U> struct has_trivial_assign<std::pair<T, U> >
: integral_constant<bool,
(has_trivial_assign<T>::value &&
has_trivial_assign<U>::value)> { };
template <class A, int N> struct has_trivial_assign<A[N]>
: has_trivial_assign<A> { };
// We can't get has_trivial_destructor right without compiler help, so
// fail conservatively. We will assume it's false except for: (1) types
// for which is_pod is true. (2) std::pair of types with trivial
// destructors. (3) array of a type with a trivial destructor.
// (4) const versions thereof.
template <class T> struct has_trivial_destructor : is_pod<T> { };
template <class T, class U> struct has_trivial_destructor<std::pair<T, U> >
: integral_constant<bool,
(has_trivial_destructor<T>::value &&
has_trivial_destructor<U>::value)> { };
template <class A, int N> struct has_trivial_destructor<A[N]>
: has_trivial_destructor<A> { };
template <class T> struct has_trivial_destructor<const T>
: has_trivial_destructor<T> { };
// Specified by TR1 [4.7.1]
template<typename T> struct remove_const { typedef T type; };
template<typename T> struct remove_const<T const> { typedef T type; };
template<typename T> struct remove_volatile { typedef T type; };
template<typename T> struct remove_volatile<T volatile> { typedef T type; };
template<typename T> struct remove_cv {
typedef typename remove_const<typename remove_volatile<T>::type>::type type;
};
// Specified by TR1 [4.7.2] Reference modifications.
template<typename T> struct remove_reference { typedef T type; };
template<typename T> struct remove_reference<T&> { typedef T type; };
template <typename T> struct add_reference { typedef T& type; };
template <typename T> struct add_reference<T&> { typedef T& type; };
// Specified by TR1 [4.7.4] Pointer modifications.
template<typename T> struct remove_pointer { typedef T type; };
template<typename T> struct remove_pointer<T*> { typedef T type; };
template<typename T> struct remove_pointer<T* const> { typedef T type; };
template<typename T> struct remove_pointer<T* volatile> { typedef T type; };
template<typename T> struct remove_pointer<T* const volatile> {
typedef T type; };
// Specified by TR1 [4.6] Relationships between types
template<typename T, typename U> struct is_same : public false_type { };
template<typename T> struct is_same<T, T> : public true_type { };
// Specified by TR1 [4.6] Relationships between types
#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3)
namespace internal {
// This class is an implementation detail for is_convertible, and you
// don't need to know how it works to use is_convertible. For those
// who care: we declare two different functions, one whose argument is
// of type To and one with a variadic argument list. We give them
// return types of different size, so we can use sizeof to trick the
// compiler into telling us which function it would have chosen if we
// had called it with an argument of type From. See Alexandrescu's
// _Modern C++ Design_ for more details on this sort of trick.
template <typename From, typename To>
struct ConvertHelper {
static small_ Test(To);
static big_ Test(...);
static From Create();
};
} // namespace internal
// Inherits from true_type if From is convertible to To, false_type otherwise.
template <typename From, typename To>
struct is_convertible
: integral_constant<bool,
sizeof(internal::ConvertHelper<From, To>::Test(
internal::ConvertHelper<From, To>::Create()))
== sizeof(small_)> {
};
#endif
_END_GOOGLE_NAMESPACE_
// Right now these macros are no-ops, and mostly just document the fact
// these types are PODs, for human use. They may be made more contentful
// later. The typedef is just to make it legal to put a semicolon after
// these macros.
#define DECLARE_POD(TypeName) typedef int Dummy_Type_For_DECLARE_POD
#define DECLARE_NESTED_POD(TypeName) DECLARE_POD(TypeName)
#define PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT(TemplateName) \
typedef int Dummy_Type_For_PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT
#define ENFORCE_POD(TypeName) typedef int Dummy_Type_For_ENFORCE_POD
#endif // BASE_TYPE_TRAITS_H_

View File

@ -0,0 +1,229 @@
# modifyed copy of contrib/orc/c++/src/CMakeLists.txt
set(LIBRARY_INCLUDE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include)
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/src)
set(PROTOBUF_INCLUDE_DIR ${Protobuf_INCLUDE_DIR})
set(PROTOBUF_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
INCLUDE(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("
#include<fcntl.h>
#include<unistd.h>
int main(int,char*[]){
int f = open(\"/x/y\", O_RDONLY);
char buf[100];
return pread(f, buf, 100, 1000) == 0;
}"
HAS_PREAD
)
CHECK_CXX_SOURCE_COMPILES("
#include<time.h>
int main(int,char*[]){
struct tm time2020;
return !strptime(\"2020-02-02 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2020);
}"
HAS_STRPTIME
)
CHECK_CXX_SOURCE_COMPILES("
#include<string>
int main(int,char* argv[]){
return static_cast<int>(std::stoll(argv[0]));
}"
HAS_STOLL
)
CHECK_CXX_SOURCE_COMPILES("
#include<stdint.h>
#include<stdio.h>
int main(int,char*[]){
int64_t x = 1; printf(\"%lld\",x);
}"
INT64_IS_LL
)
CHECK_CXX_SOURCE_COMPILES("
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored \"-Wdeprecated\"
#pragma clang diagnostic pop
#elif defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored \"-Wdeprecated\"
#pragma GCC diagnostic pop
#elif defined(_MSC_VER)
#pragma warning( push )
#pragma warning( disable : 4996 )
#pragma warning( pop )
#else
unknownCompiler!
#endif
int main(int, char *[]) {}"
HAS_DIAGNOSTIC_PUSH
)
CHECK_CXX_SOURCE_COMPILES("
#include<cmath>
int main(int, char *[]) {
return std::isnan(1.0f);
}"
HAS_STD_ISNAN
)
CHECK_CXX_SOURCE_COMPILES("
#include<mutex>
int main(int, char *[]) {
std::mutex test_mutex;
std::lock_guard<std::mutex> lock_mutex(test_mutex);
}"
HAS_STD_MUTEX
)
CHECK_CXX_SOURCE_COMPILES("
#include<string>
std::string func() {
std::string var = \"test\";
return std::move(var);
}
int main(int, char *[]) {}"
NEEDS_REDUNDANT_MOVE
)
INCLUDE(CheckCXXSourceRuns)
CHECK_CXX_SOURCE_RUNS("
#include<time.h>
int main(int, char *[]) {
time_t t = -14210715; // 1969-07-20 12:34:45
struct tm *ptm = gmtime(&t);
return !(ptm && ptm->tm_year == 69);
}"
HAS_PRE_1970
)
CHECK_CXX_SOURCE_RUNS("
#include<stdlib.h>
#include<time.h>
int main(int, char *[]) {
setenv(\"TZ\", \"America/Los_Angeles\", 1);
tzset();
struct tm time2037;
struct tm time2038;
strptime(\"2037-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2037);
strptime(\"2038-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2038);
return mktime(&time2038) - mktime(&time2037) != 31536000;
}"
HAS_POST_2038
)
set(CMAKE_REQUIRED_INCLUDES ${ZLIB_INCLUDE_DIR})
set(CMAKE_REQUIRED_LIBRARIES zlib)
CHECK_CXX_SOURCE_COMPILES("
#define Z_PREFIX
#include<zlib.h>
z_stream strm;
int main(int, char *[]) {
deflateReset(&strm);
}"
NEEDS_Z_PREFIX
)
configure_file (
"${LIBRARY_DIR}/Adaptor.hh.in"
"${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.cc
COMMAND ${PROTOBUF_EXECUTABLE}
-I${ClickHouse_SOURCE_DIR}/contrib/orc/proto
--cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
"${ClickHouse_SOURCE_DIR}/contrib/orc/proto/orc_proto.proto"
)
set(SOURCE_FILES
"${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h
${LIBRARY_DIR}/io/InputStream.cc
${LIBRARY_DIR}/io/OutputStream.cc
${LIBRARY_DIR}/wrap/orc-proto-wrapper.cc
${LIBRARY_DIR}/Adaptor.cc
${LIBRARY_DIR}/ByteRLE.cc
${LIBRARY_DIR}/ColumnPrinter.cc
${LIBRARY_DIR}/ColumnReader.cc
${LIBRARY_DIR}/ColumnWriter.cc
${LIBRARY_DIR}/Common.cc
${LIBRARY_DIR}/Compression.cc
${LIBRARY_DIR}/Exceptions.cc
${LIBRARY_DIR}/Int128.cc
${LIBRARY_DIR}/LzoDecompressor.cc
${LIBRARY_DIR}/MemoryPool.cc
${LIBRARY_DIR}/OrcFile.cc
${LIBRARY_DIR}/Reader.cc
${LIBRARY_DIR}/RLEv1.cc
${LIBRARY_DIR}/RLEv2.cc
${LIBRARY_DIR}/RLE.cc
${LIBRARY_DIR}/Statistics.cc
${LIBRARY_DIR}/StripeStream.cc
${LIBRARY_DIR}/Timezone.cc
${LIBRARY_DIR}/TypeImpl.cc
${LIBRARY_DIR}/Vector.cc
${LIBRARY_DIR}/Writer.cc
)
if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
set(SOURCE_FILES ${SOURCE_FILES} ${LIBRARY_DIR}/OrcHdfsFile.cc)
endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
#list(TRANSFORM SOURCE_FILES PREPEND ${LIBRARY_DIR}/)
configure_file (
"${LIBRARY_INCLUDE}/orc/orc-config.hh.in"
"${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh"
)
add_library (orc ${SOURCE_FILES})
target_include_directories (orc
PRIVATE
${LIBRARY_INCLUDE}
${LIBRARY_DIR}
#PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
PRIVATE
${PROTOBUF_INCLUDE_DIR}
${ZLIB_INCLUDE_DIR}
${SNAPPY_INCLUDE_DIR}
${LZ4_INCLUDE_DIR}
${LIBHDFSPP_INCLUDE_DIR}
)
target_link_libraries (orc PRIVATE
${Protobuf_LIBRARY}
${ZLIB_LIBRARIES}
${SNAPPY_LIBRARY}
${LZ4_LIBRARY}
${LIBHDFSPP_LIBRARIES}
)
#install(TARGETS orc DESTINATION lib)
if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
add_definitions(-DBUILD_LIBHDFSPP)
endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)

1
contrib/sparsehash-c11 vendored Submodule

@ -0,0 +1 @@
Subproject commit cf0bffaa456f23bc4174462a789b90f8b6f5f42f

View File

@ -101,28 +101,6 @@ add_headers_and_sources(clickhouse_common_io src/Common)
add_headers_and_sources(clickhouse_common_io src/Common/HashTable) add_headers_and_sources(clickhouse_common_io src/Common/HashTable)
add_headers_and_sources(clickhouse_common_io src/IO) add_headers_and_sources(clickhouse_common_io src/IO)
add_headers_and_sources(dbms src/Core)
add_headers_and_sources(dbms src/Compression/)
add_headers_and_sources(dbms src/DataStreams)
add_headers_and_sources(dbms src/DataTypes)
add_headers_and_sources(dbms src/Databases)
add_headers_and_sources(dbms src/Interpreters)
add_headers_and_sources(dbms src/Interpreters/ClusterProxy)
add_headers_and_sources(dbms src/Columns)
add_headers_and_sources(dbms src/Storages)
add_headers_and_sources(dbms src/Storages/Distributed)
add_headers_and_sources(dbms src/Storages/MergeTree)
add_headers_and_sources(dbms src/Storages/LiveView)
add_headers_and_sources(dbms src/Client)
add_headers_and_sources(dbms src/Formats)
add_headers_and_sources(dbms src/Processors)
add_headers_and_sources(dbms src/Processors/Executors)
add_headers_and_sources(dbms src/Processors/Formats)
add_headers_and_sources(dbms src/Processors/Formats/Impl)
add_headers_and_sources(dbms src/Processors/Transforms)
add_headers_and_sources(dbms src/Processors/Sources)
add_headers_only(dbms src/Server)
if(USE_RDKAFKA) if(USE_RDKAFKA)
add_headers_and_sources(dbms src/Storages/Kafka) add_headers_and_sources(dbms src/Storages/Kafka)
endif() endif()
@ -160,23 +138,72 @@ if (OS_FREEBSD)
target_compile_definitions (clickhouse_common_io PUBLIC CLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST) target_compile_definitions (clickhouse_common_io PUBLIC CLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST)
endif () endif ()
if (USE_UNWIND)
target_link_libraries (clickhouse_common_io PRIVATE ${UNWIND_LIBRARIES})
endif ()
add_subdirectory(src/Common/ZooKeeper) add_subdirectory(src/Common/ZooKeeper)
add_subdirectory(src/Common/Config) add_subdirectory(src/Common/Config)
set (all_modules)
macro(add_object_library name common_path)
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
add_glob(dbms_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(dbms_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
else ()
list (APPEND all_modules ${name})
add_glob(${name}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${name}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
add_library(${name} SHARED ${${name}_sources} ${${name}_headers})
target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all)
endif ()
endmacro()
add_object_library(clickhouse_core src/Core)
add_object_library(clickhouse_compression src/Compression/)
add_object_library(clickhouse_datastreams src/DataStreams)
add_object_library(clickhouse_datatypes src/DataTypes)
add_object_library(clickhouse_databases src/Databases)
add_object_library(clickhouse_interpreters src/Interpreters)
add_object_library(clickhouse_interpreters_clusterproxy src/Interpreters/ClusterProxy)
add_object_library(clickhouse_columns src/Columns)
add_object_library(clickhouse_storages src/Storages)
add_object_library(clickhouse_storages_distributed src/Storages/Distributed)
add_object_library(clickhouse_storages_mergetree src/Storages/MergeTree)
add_object_library(clickhouse_storages_liveview src/Storages/LiveView)
add_object_library(clickhouse_client src/Client)
add_object_library(clickhouse_formats src/Formats)
add_object_library(clickhouse_processors src/Processors)
add_object_library(clickhouse_processors_executors src/Processors/Executors)
add_object_library(clickhouse_processors_formats src/Processors/Formats)
add_object_library(clickhouse_processors_formats_impl src/Processors/Formats/Impl)
add_object_library(clickhouse_processors_transforms src/Processors/Transforms)
add_object_library(clickhouse_processors_sources src/Processors/Sources)
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
add_library(dbms ${dbms_headers} ${dbms_sources}) add_library (dbms STATIC ${dbms_headers} ${dbms_sources})
else () set (all_modules dbms)
add_library(dbms SHARED ${dbms_headers} ${dbms_sources}) else()
add_library (dbms SHARED ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PUBLIC ${all_modules})
list (APPEND all_modules dbms)
# force all split libs to be linked
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
endif () endif ()
macro (dbms_target_include_directories)
foreach (module ${all_modules})
target_include_directories (${module} ${ARGN})
endforeach ()
endmacro ()
macro (dbms_target_link_libraries)
foreach (module ${all_modules})
target_link_libraries (${module} ${ARGN})
endforeach ()
endmacro ()
if (USE_EMBEDDED_COMPILER) if (USE_EMBEDDED_COMPILER)
llvm_libs_all(REQUIRED_LLVM_LIBRARIES) llvm_libs_all(REQUIRED_LLVM_LIBRARIES)
target_link_libraries (dbms PRIVATE ${REQUIRED_LLVM_LIBRARIES}) dbms_target_link_libraries (PRIVATE ${REQUIRED_LLVM_LIBRARIES})
target_include_directories (dbms SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
endif () endif ()
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
@ -245,9 +272,9 @@ target_link_libraries(clickhouse_common_io
) )
if (USE_RDKAFKA) if (USE_RDKAFKA)
target_link_libraries(dbms PRIVATE ${CPPKAFKA_LIBRARY} ${RDKAFKA_LIBRARY}) dbms_target_link_libraries(PRIVATE ${CPPKAFKA_LIBRARY} ${RDKAFKA_LIBRARY})
if(NOT USE_INTERNAL_RDKAFKA_LIBRARY) if(NOT USE_INTERNAL_RDKAFKA_LIBRARY)
target_include_directories(dbms SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR})
endif() endif()
endif() endif()
@ -264,7 +291,7 @@ if(CPUINFO_LIBRARY)
target_link_libraries(clickhouse_common_io PRIVATE ${CPUINFO_LIBRARY}) target_link_libraries(clickhouse_common_io PRIVATE ${CPUINFO_LIBRARY})
endif() endif()
target_link_libraries (dbms dbms_target_link_libraries (
PRIVATE PRIVATE
clickhouse_parsers clickhouse_parsers
clickhouse_common_config clickhouse_common_config
@ -285,21 +312,24 @@ target_link_libraries (dbms
${Boost_SYSTEM_LIBRARY} ${Boost_SYSTEM_LIBRARY}
) )
target_include_directories(dbms PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include)
target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include) # uses some includes from core target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include) # uses some includes from core
target_include_directories(dbms SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) dbms_target_include_directories(PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include)
target_include_directories(dbms SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
target_include_directories(clickhouse_common_io SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
dbms_target_include_directories(SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR})
if (NOT USE_INTERNAL_LZ4_LIBRARY) if (NOT USE_INTERNAL_LZ4_LIBRARY)
target_include_directories(dbms SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR}) dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR})
endif () endif ()
if (ZSTD_LIBRARY) if (ZSTD_LIBRARY)
target_link_libraries(dbms PRIVATE ${ZSTD_LIBRARY}) dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY})
if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR})
endif ()
endif() endif()
if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
target_include_directories(dbms SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR})
endif ()
if (NOT USE_INTERNAL_BOOST_LIBRARY) if (NOT USE_INTERNAL_BOOST_LIBRARY)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
@ -307,68 +337,69 @@ endif ()
if (Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) if (Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY)
target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_SQL_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_SQL_INCLUDE_DIR})
target_include_directories (dbms SYSTEM PRIVATE ${Poco_SQL_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM PRIVATE ${Poco_SQL_INCLUDE_DIR})
endif() endif()
if (USE_POCO_SQLODBC) if (USE_POCO_SQLODBC)
target_link_libraries (clickhouse_common_io PRIVATE ${Poco_SQL_LIBRARY}) target_link_libraries (clickhouse_common_io PRIVATE ${Poco_SQL_LIBRARY})
target_link_libraries (dbms PRIVATE ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY}) dbms_target_link_libraries (PRIVATE ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY})
if (NOT USE_INTERNAL_POCO_LIBRARY) if (NOT USE_INTERNAL_POCO_LIBRARY)
target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_SQL_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_SQL_INCLUDE_DIR})
target_include_directories (dbms SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_SQLODBC_INCLUDE_DIR} SYSTEM PUBLIC ${Poco_SQL_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_SQLODBC_INCLUDE_DIR} SYSTEM PUBLIC ${Poco_SQL_INCLUDE_DIR})
endif() endif()
endif() endif()
if (Poco_Data_FOUND) if (Poco_Data_FOUND)
target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR})
target_include_directories (dbms SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR})
endif() endif()
if (USE_POCO_DATAODBC) if (USE_POCO_DATAODBC)
target_link_libraries (clickhouse_common_io PRIVATE ${Poco_Data_LIBRARY}) target_link_libraries (clickhouse_common_io PRIVATE ${Poco_Data_LIBRARY})
target_link_libraries (dbms PRIVATE ${Poco_DataODBC_LIBRARY}) dbms_target_link_libraries (PRIVATE ${Poco_DataODBC_LIBRARY})
if (NOT USE_INTERNAL_POCO_LIBRARY) if (NOT USE_INTERNAL_POCO_LIBRARY)
target_include_directories (dbms SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_DataODBC_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_DataODBC_INCLUDE_DIR})
endif() endif()
endif() endif()
if (USE_POCO_MONGODB) if (USE_POCO_MONGODB)
target_link_libraries (dbms PRIVATE ${Poco_MongoDB_LIBRARY}) dbms_target_link_libraries (PRIVATE ${Poco_MongoDB_LIBRARY})
endif() endif()
if (USE_POCO_NETSSL) if (USE_POCO_NETSSL)
target_link_libraries (clickhouse_common_io PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) target_link_libraries (clickhouse_common_io PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY})
target_link_libraries (dbms PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) dbms_target_link_libraries (PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY})
endif() endif()
target_link_libraries (dbms PRIVATE ${Poco_Foundation_LIBRARY}) dbms_target_link_libraries (PRIVATE ${Poco_Foundation_LIBRARY})
if (USE_ICU) if (USE_ICU)
target_link_libraries (dbms PRIVATE ${ICU_LIBRARIES}) dbms_target_link_libraries (PRIVATE ${ICU_LIBRARIES})
target_include_directories (dbms SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) dbms_target_include_directories (SYSTEM PRIVATE ${ICU_INCLUDE_DIRS})
endif () endif ()
if (USE_CAPNP) if (USE_CAPNP)
target_link_libraries (dbms PRIVATE ${CAPNP_LIBRARIES}) dbms_target_link_libraries (PRIVATE ${CAPNP_LIBRARIES})
endif () endif ()
if (USE_PARQUET) if (USE_PARQUET)
target_link_libraries(dbms PRIVATE ${PARQUET_LIBRARY}) dbms_target_link_libraries(PRIVATE ${PARQUET_LIBRARY})
if (NOT USE_INTERNAL_PARQUET_LIBRARY OR USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) if (NOT USE_INTERNAL_PARQUET_LIBRARY OR USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR})
endif () endif ()
endif () endif ()
if(OPENSSL_CRYPTO_LIBRARY) if (OPENSSL_CRYPTO_LIBRARY)
target_link_libraries(dbms PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) dbms_target_link_libraries (PRIVATE ${OPENSSL_CRYPTO_LIBRARY})
target_link_libraries (clickhouse_common_io PRIVATE ${OPENSSL_CRYPTO_LIBRARY})
endif () endif ()
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR})
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
if (USE_PROTOBUF) if (USE_PROTOBUF)
target_link_libraries (dbms PRIVATE ${Protobuf_LIBRARY}) dbms_target_link_libraries (PRIVATE ${Protobuf_LIBRARY})
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR})
endif () endif ()
if (USE_HDFS) if (USE_HDFS)
@ -382,13 +413,23 @@ if (USE_BROTLI)
endif() endif()
if (USE_JEMALLOC) if (USE_JEMALLOC)
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # new_delete.cpp target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # new_delete.cpp
# common/memory.h
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
# skip if we have bundled build, since jemalloc is static in this case
elseif (${JEMALLOC_LIBRARIES} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$")
# if the library is static we do not need to link with it,
# since in this case it will be in libs/libcommon,
# and we do not want to link with jemalloc multiple times.
else()
target_link_libraries(clickhouse_common_io PRIVATE ${JEMALLOC_LIBRARIES})
endif()
endif () endif ()
target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include) dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include)
target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR})
target_include_directories (clickhouse_common_io SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR})
# also for copy_headers.sh: # also for copy_headers.sh:

View File

@ -24,9 +24,9 @@ configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h)
macro(clickhouse_target_link_split_lib target name) macro(clickhouse_target_link_split_lib target name)
if(NOT CLICKHOUSE_ONE_SHARED) if(NOT CLICKHOUSE_ONE_SHARED)
target_link_libraries(${target} PRIVATE clickhouse-${name}-lib) target_link_libraries(${target} PRIVATE clickhouse-${name}-lib ${MALLOC_LIBRARIES})
else() else()
target_link_libraries(${target} PRIVATE clickhouse-lib) target_link_libraries(${target} PRIVATE clickhouse-lib ${MALLOC_LIBRARIES})
endif() endif()
endmacro() endmacro()
@ -111,7 +111,7 @@ if (CLICKHOUSE_SPLIT_BINARY)
install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse) install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse)
else () else ()
add_executable (clickhouse main.cpp) add_executable (clickhouse main.cpp)
target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils) target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${MALLOC_LIBRARIES})
target_include_directories (clickhouse BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) target_include_directories (clickhouse BEFORE PRIVATE ${COMMON_INCLUDE_DIR})
target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
@ -205,6 +205,9 @@ else ()
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
endif()
endif () endif ()
if (TARGET clickhouse-server AND TARGET copy-headers) if (TARGET clickhouse-server AND TARGET copy-headers)

View File

@ -4,7 +4,11 @@ set(CLICKHOUSE_CLIENT_SOURCES
) )
set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
set(CLICKHOUSE_CLIENT_INCLUDE SYSTEM PRIVATE ${READLINE_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include) set(CLICKHOUSE_CLIENT_INCLUDE PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include)
if (READLINE_INCLUDE_DIR)
set(CLICKHOUSE_CLIENT_INCLUDE ${CLICKHOUSE_CLIENT_INCLUDE} SYSTEM PRIVATE ${READLINE_INCLUDE_DIR})
endif ()
include(CheckSymbolExists) include(CheckSymbolExists)
check_symbol_exists(readpassphrase readpassphrase.h HAVE_READPASSPHRASE) check_symbol_exists(readpassphrase readpassphrase.h HAVE_READPASSPHRASE)

View File

@ -106,6 +106,7 @@ namespace ErrorCodes
extern const int CANNOT_SET_SIGNAL_HANDLER; extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int CANNOT_READLINE; extern const int CANNOT_READLINE;
extern const int SYSTEM_ERROR; extern const int SYSTEM_ERROR;
extern const int INVALID_USAGE_OF_INPUT;
} }
@ -562,9 +563,17 @@ private:
if (is_interactive) if (is_interactive)
{ {
std::cout << "Connected to " << server_name std::cout << "Connected to " << server_name
<< " server version " << server_version << " server version " << server_version
<< " revision " << server_revision << " revision " << server_revision
<< "." << std::endl << std::endl; << "." << std::endl << std::endl;
if (std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
< std::make_tuple(server_version_major, server_version_minor, server_version_patch))
{
std::cout << "ClickHouse client version is older than ClickHouse server. "
<< "It may lack support for new features."
<< std::endl << std::endl;
}
} }
} }
@ -843,9 +852,17 @@ private:
connection->forceConnected(connection_parameters.timeouts); connection->forceConnected(connection_parameters.timeouts);
/// INSERT query for which data transfer is needed (not an INSERT SELECT) is processed separately. ASTPtr input_function;
if (insert && !insert->select) if (insert && insert->select)
insert->tryFindInputFunction(input_function);
/// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
if (insert && (!insert->select || input_function))
{
if (input_function && insert->format.empty())
throw Exception("FORMAT must be specified for function input()", ErrorCodes::INVALID_USAGE_OF_INPUT);
processInsertQuery(); processInsertQuery();
}
else else
processOrdinaryQuery(); processOrdinaryQuery();
} }

View File

@ -8,7 +8,6 @@
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <ext/singleton.h>
#include <common/readline_use.h> #include <common/readline_use.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
@ -25,7 +24,7 @@ namespace ErrorCodes
extern const int UNKNOWN_PACKET_FROM_SERVER; extern const int UNKNOWN_PACKET_FROM_SERVER;
} }
class Suggest : public ext::singleton<Suggest> class Suggest : private boost::noncopyable
{ {
private: private:
/// The vector will be filled with completion words from the server and sorted. /// The vector will be filled with completion words from the server and sorted.
@ -161,6 +160,12 @@ private:
} }
public: public:
static Suggest & instance()
{
static Suggest instance;
return instance;
}
/// More old server versions cannot execute the query above. /// More old server versions cannot execute the query above.
static constexpr int MIN_SERVER_REVISION = 54406; static constexpr int MIN_SERVER_REVISION = 54406;

View File

@ -1,5 +1,5 @@
set(CLICKHOUSE_COPIER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp) set(CLICKHOUSE_COPIER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp)
set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_common_zookeeper clickhouse_parsers clickhouse_functions clickhouse_table_functions clickhouse_aggregate_functions clickhouse_dictionaries string_utils PUBLIC daemon) set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_common_zookeeper clickhouse_parsers clickhouse_functions clickhouse_table_functions clickhouse_aggregate_functions clickhouse_dictionaries string_utils ${Poco_XML_LIBRARY} PUBLIC daemon)
set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR})
clickhouse_program_add(copier) clickhouse_program_add(copier)

View File

@ -35,14 +35,17 @@ clickhouse_program_add_library(odbc-bridge)
# clickhouse-odbc-bridge is always a separate binary. # clickhouse-odbc-bridge is always a separate binary.
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
# For this reason, we disabling -rdynamic linker flag. But we do it in strange way: set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
add_executable(clickhouse-odbc-bridge odbc-bridge.cpp) add_executable(clickhouse-odbc-bridge odbc-bridge.cpp)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
clickhouse_program_link_split_binary(odbc-bridge) clickhouse_program_link_split_binary(odbc-bridge)
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)
endif()
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
if(ENABLE_TESTS) if(ENABLE_TESTS)

View File

@ -15,6 +15,7 @@
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <ext/range.h> #include <ext/range.h>
#include <Common/SensitiveDataMasker.h>
namespace DB namespace DB
{ {
@ -165,8 +166,7 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
if (config().has("query_masking_rules")) if (config().has("query_masking_rules"))
{ {
context->setSensitiveDataMasker(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules")); SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
setLoggerSensitiveDataMasker(logger(), context->getSensitiveDataMasker());
} }
auto server = Poco::Net::HTTPServer( auto server = Poco::Net::HTTPServer(

View File

@ -31,9 +31,10 @@ void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_
else if (key == "average_speed_not_changing_for_ms") else if (key == "average_speed_not_changing_for_ms")
average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else else
throw Exception("Met unkown stop condition: " + key, ErrorCodes::LOGICAL_ERROR); throw Exception("Met unknown stop condition: " + key, ErrorCodes::LOGICAL_ERROR);
++initialized_count;
} }
++initialized_count;
} }
void StopConditionsSet::reset() void StopConditionsSet::reset()

View File

@ -21,6 +21,7 @@ MetricsTransmitter::MetricsTransmitter(
{ {
interval_seconds = config.getInt(config_name + ".interval", 60); interval_seconds = config.getInt(config_name + ".interval", 60);
send_events = config.getBool(config_name + ".events", true); send_events = config.getBool(config_name + ".events", true);
send_events_cumulative = config.getBool(config_name + ".events_cumulative", false);
send_metrics = config.getBool(config_name + ".metrics", true); send_metrics = config.getBool(config_name + ".metrics", true);
send_asynchronous_metrics = config.getBool(config_name + ".asynchronous_metrics", true); send_asynchronous_metrics = config.getBool(config_name + ".asynchronous_metrics", true);
} }
@ -95,6 +96,16 @@ void MetricsTransmitter::transmit(std::vector<ProfileEvents::Count> & prev_count
} }
} }
if (send_events_cumulative)
{
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
{
const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed);
std::string key{ProfileEvents::getName(static_cast<ProfileEvents::Event>(i))};
key_vals.emplace_back(profile_events_cumulative_path_prefix + key, counter);
}
}
if (send_metrics) if (send_metrics)
{ {
for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i)

View File

@ -24,7 +24,8 @@ class AsynchronousMetrics;
/** Automatically sends /** Automatically sends
* - difference of ProfileEvents; * - delta values of ProfileEvents;
* - cumulative values of ProfileEvents;
* - values of CurrentMetrics; * - values of CurrentMetrics;
* - values of AsynchronousMetrics; * - values of AsynchronousMetrics;
* to Graphite at beginning of every minute. * to Graphite at beginning of every minute.
@ -44,6 +45,7 @@ private:
std::string config_name; std::string config_name;
UInt32 interval_seconds; UInt32 interval_seconds;
bool send_events; bool send_events;
bool send_events_cumulative;
bool send_metrics; bool send_metrics;
bool send_asynchronous_metrics; bool send_asynchronous_metrics;
@ -53,6 +55,7 @@ private:
ThreadFromGlobalPool thread{&MetricsTransmitter::run, this}; ThreadFromGlobalPool thread{&MetricsTransmitter::run, this};
static inline constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; static inline constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents.";
static inline constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative.";
static inline constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; static inline constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics.";
static inline constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics."; static inline constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics.";
}; };

View File

@ -1,3 +1,5 @@
#include <Common/config.h>
#if USE_POCO_NETSSL
#include "MySQLHandler.h" #include "MySQLHandler.h"
#include <limits> #include <limits>
@ -293,10 +295,12 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
should_replace = true; should_replace = true;
} }
executeQuery(should_replace ? empty_select : payload, *out, true, connection_context, set_content_type, nullptr); Context query_context = connection_context;
executeQuery(should_replace ? empty_select : payload, *out, true, query_context, set_content_type, nullptr);
if (!with_output) if (!with_output)
packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true);
} }
} }
#endif

View File

@ -1,4 +1,6 @@
#pragma once #pragma once
#include <Common/config.h>
#if USE_POCO_NETSSL
#include <Poco/Net/TCPServerConnection.h> #include <Poco/Net/TCPServerConnection.h>
#include <Poco/Net/SecureStreamSocket.h> #include <Poco/Net/SecureStreamSocket.h>
@ -56,3 +58,4 @@ private:
}; };
} }
#endif

View File

@ -1,3 +1,5 @@
#include <Common/config.h>
#if USE_POCO_NETSSL
#include <Common/OpenSSLHelpers.h> #include <Common/OpenSSLHelpers.h>
#include <Poco/Crypto/X509Certificate.h> #include <Poco/Crypto/X509Certificate.h>
#include <Poco/Net/SSLManager.h> #include <Poco/Net/SSLManager.h>
@ -122,3 +124,4 @@ Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poc
} }
} }
#endif

View File

@ -1,5 +1,8 @@
#pragma once #pragma once
#include <Common/config.h>
#if USE_POCO_NETSSL
#include <Poco/Net/TCPServerConnectionFactory.h> #include <Poco/Net/TCPServerConnectionFactory.h>
#include <atomic> #include <atomic>
#include <openssl/rsa.h> #include <openssl/rsa.h>
@ -37,3 +40,4 @@ public:
}; };
} }
#endif

View File

@ -55,6 +55,7 @@
#include "TCPHandlerFactory.h" #include "TCPHandlerFactory.h"
#include "Common/config_version.h" #include "Common/config_version.h"
#include "MySQLHandlerFactory.h" #include "MySQLHandlerFactory.h"
#include <Common/SensitiveDataMasker.h>
#if defined(__linux__) #if defined(__linux__)
@ -87,6 +88,7 @@ namespace ErrorCodes
extern const int FAILED_TO_GETPWUID; extern const int FAILED_TO_GETPWUID;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR; extern const int NETWORK_ERROR;
extern const int PATH_ACCESS_DENIED;
} }
@ -269,6 +271,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
Poco::File(path + "data/" + default_database).createDirectories(); Poco::File(path + "data/" + default_database).createDirectories();
Poco::File(path + "metadata/" + default_database).createDirectories(); Poco::File(path + "metadata/" + default_database).createDirectories();
/// Check that we have read and write access to all data paths
auto disk_selector = global_context->getDiskSelector();
for (const auto & [name, disk] : disk_selector.getDisksMap())
{
Poco::File disk_path(disk->getPath());
if (!disk_path.canRead() || !disk_path.canWrite())
throw Exception("There is no RW access to disk " + name + " (" + disk->getPath() + ")", ErrorCodes::PATH_ACCESS_DENIED);
}
StatusFile status{path + "status"}; StatusFile status{path + "status"};
SCOPE_EXIT({ SCOPE_EXIT({
@ -279,8 +290,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
*/ */
LOG_INFO(log, "Shutting down storages."); LOG_INFO(log, "Shutting down storages.");
// global_context is the owner of sensitive_data_masker, which will be destoyed after global_context->shutdown() call
setLoggerSensitiveDataMasker(logger(), nullptr);
global_context->shutdown(); global_context->shutdown();
LOG_DEBUG(log, "Shutted down storages."); LOG_DEBUG(log, "Shutted down storages.");
@ -414,7 +423,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (config().has("query_masking_rules")) if (config().has("query_masking_rules"))
{ {
global_context->setSensitiveDataMasker(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules")); SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
} }
auto main_config_reloader = std::make_unique<ConfigReloader>(config_path, auto main_config_reloader = std::make_unique<ConfigReloader>(config_path,
@ -426,10 +435,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
{ {
setTextLog(global_context->getTextLog()); setTextLog(global_context->getTextLog());
buildLoggers(*config, logger()); buildLoggers(*config, logger());
if (auto masker = global_context->getSensitiveDataMasker())
{
setLoggerSensitiveDataMasker(logger(), masker);
}
global_context->setClustersConfig(config); global_context->setClustersConfig(config);
global_context->setMacros(std::make_unique<Macros>(*config, "macros")); global_context->setMacros(std::make_unique<Macros>(*config, "macros"));
}, },

View File

@ -203,6 +203,43 @@ void TCPHandler::runImpl()
state.maybe_compressed_in.reset(); /// For more accurate accounting by MemoryTracker. state.maybe_compressed_in.reset(); /// For more accurate accounting by MemoryTracker.
}); });
/// Send structure of columns to client for function input()
query_context->setInputInitializer([this] (Context & context, const StoragePtr & input_storage)
{
if (&context != &query_context.value())
throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR);
state.need_receive_data_for_input = true;
/// Send ColumnsDescription for input storage.
if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA
&& query_context->getSettingsRef().input_format_defaults_for_omitted_fields)
{
sendTableColumns(input_storage->getColumns());
}
/// Send block to the client - input storage structure.
state.input_header = input_storage->getSampleBlock();
sendData(state.input_header);
});
query_context->setInputBlocksReaderCallback([&connection_settings, this] (Context & context) -> Block
{
if (&context != &query_context.value())
throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR);
size_t poll_interval;
int receive_timeout;
std::tie(poll_interval, receive_timeout) = getReadTimeouts(connection_settings);
if (!readDataNext(poll_interval, receive_timeout))
{
state.block_in.reset();
state.maybe_compressed_in.reset();
return Block();
}
return state.block_for_input;
});
customizeContext(*query_context); customizeContext(*query_context);
bool may_have_embedded_data = client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA; bool may_have_embedded_data = client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA;
@ -218,6 +255,13 @@ void TCPHandler::runImpl()
/// Does the request require receive data from client? /// Does the request require receive data from client?
if (state.need_receive_data_for_insert) if (state.need_receive_data_for_insert)
processInsertQuery(connection_settings); processInsertQuery(connection_settings);
else if (state.need_receive_data_for_input)
{
/// It is special case for input(), all works for reading data from client will be done in callbacks.
/// state.io.in is NullAndDoCopyBlockInputStream so read it once.
state.io.in->read();
state.io.onFinish();
}
else if (state.io.pipeline.initialized()) else if (state.io.pipeline.initialized())
processOrdinaryQueryWithProcessors(query_context->getSettingsRef().max_threads); processOrdinaryQueryWithProcessors(query_context->getSettingsRef().max_threads);
else else
@ -324,7 +368,50 @@ void TCPHandler::runImpl()
} }
void TCPHandler::readData(const Settings & connection_settings) bool TCPHandler::readDataNext(const size_t & poll_interval, const int & receive_timeout)
{
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
/// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down.
while (true)
{
if (static_cast<ReadBufferFromPocoSocket &>(*in).poll(poll_interval))
break;
/// Do we need to shut down?
if (server.isCancelled())
return false;
/** Have we waited for data for too long?
* If we periodically poll, the receive_timeout of the socket itself does not work.
* Therefore, an additional check is added.
*/
double elapsed = watch.elapsedSeconds();
if (elapsed > receive_timeout)
{
std::stringstream ss;
ss << "Timeout exceeded while receiving data from client.";
ss << " Waited for " << static_cast<size_t>(elapsed) << " seconds,";
ss << " timeout is " << receive_timeout << " seconds.";
throw Exception(ss.str(), ErrorCodes::SOCKET_TIMEOUT);
}
}
/// If client disconnected.
if (in->eof())
return false;
/// We accept and process data. And if they are over, then we leave.
if (!receivePacket())
return false;
sendLogs();
return true;
}
std::tuple<size_t, int> TCPHandler::getReadTimeouts(const Settings & connection_settings)
{ {
const auto receive_timeout = query_context->getSettingsRef().receive_timeout.value; const auto receive_timeout = query_context->getSettingsRef().receive_timeout.value;
@ -334,48 +421,21 @@ void TCPHandler::readData(const Settings & connection_settings)
constexpr size_t min_poll_interval = 5000; // 5 ms constexpr size_t min_poll_interval = 5000; // 5 ms
size_t poll_interval = std::max(min_poll_interval, std::min(default_poll_interval, current_poll_interval)); size_t poll_interval = std::max(min_poll_interval, std::min(default_poll_interval, current_poll_interval));
return std::make_tuple(poll_interval, receive_timeout.totalSeconds());
}
void TCPHandler::readData(const Settings & connection_settings)
{
size_t poll_interval;
int receive_timeout;
std::tie(poll_interval, receive_timeout) = getReadTimeouts(connection_settings);
sendLogs(); sendLogs();
while (true) while (true)
{ if (!readDataNext(poll_interval, receive_timeout))
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
/// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down.
while (true)
{
if (static_cast<ReadBufferFromPocoSocket &>(*in).poll(poll_interval))
break;
/// Do we need to shut down?
if (server.isCancelled())
return;
/** Have we waited for data for too long?
* If we periodically poll, the receive_timeout of the socket itself does not work.
* Therefore, an additional check is added.
*/
double elapsed = watch.elapsedSeconds();
if (elapsed > receive_timeout.totalSeconds())
{
std::stringstream ss;
ss << "Timeout exceeded while receiving data from client.";
ss << " Waited for " << static_cast<size_t>(elapsed) << " seconds,";
ss << " timeout is " << receive_timeout.totalSeconds() << " seconds.";
throw Exception(ss.str(), ErrorCodes::SOCKET_TIMEOUT);
}
}
/// If client disconnected.
if (in->eof())
return; return;
/// We accept and process data. And if they are over, then we leave.
if (!receivePacket())
break;
sendLogs();
}
} }
@ -485,6 +545,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads)
{ {
auto & pipeline = state.io.pipeline; auto & pipeline = state.io.pipeline;
if (pipeline.getMaxThreads())
num_threads = pipeline.getMaxThreads();
/// Send header-block, to allow client to prepare output format for data to send. /// Send header-block, to allow client to prepare output format for data to send.
{ {
auto & header = pipeline.getHeader(); auto & header = pipeline.getHeader();
@ -534,7 +597,15 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads)
lazy_format->finish(); lazy_format->finish();
lazy_format->clearQueue(); lazy_format->clearQueue();
pool.wait(); try
{
pool.wait();
}
catch (...)
{
/// If exception was thrown during pipeline execution, skip it while processing other exception.
}
pipeline = QueryPipeline() pipeline = QueryPipeline()
); );
@ -901,7 +972,7 @@ bool TCPHandler::receiveData()
{ {
/// If there is an insert request, then the data should be written directly to `state.io.out`. /// If there is an insert request, then the data should be written directly to `state.io.out`.
/// Otherwise, we write the blocks in the temporary `external_table_name` table. /// Otherwise, we write the blocks in the temporary `external_table_name` table.
if (!state.need_receive_data_for_insert) if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input)
{ {
StoragePtr storage; StoragePtr storage;
/// If such a table does not exist, create it. /// If such a table does not exist, create it.
@ -915,7 +986,9 @@ bool TCPHandler::receiveData()
/// The data will be written directly to the table. /// The data will be written directly to the table.
state.io.out = storage->write(ASTPtr(), *query_context); state.io.out = storage->write(ASTPtr(), *query_context);
} }
if (block) if (state.need_receive_data_for_input)
state.block_for_input = block;
else
state.io.out->write(block); state.io.out->write(block);
return true; return true;
} }
@ -938,8 +1011,7 @@ void TCPHandler::receiveUnexpectedData()
auto skip_block_in = std::make_shared<NativeBlockInputStream>( auto skip_block_in = std::make_shared<NativeBlockInputStream>(
*maybe_compressed_in, *maybe_compressed_in,
last_block_in.header, last_block_in.header,
client_revision, client_revision);
!connection_context.getSettingsRef().low_cardinality_allow_in_native_format);
Block skip_block = skip_block_in->read(); Block skip_block = skip_block_in->read();
throw NetException("Unexpected packet Data received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); throw NetException("Unexpected packet Data received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
@ -957,6 +1029,8 @@ void TCPHandler::initBlockInput()
Block header; Block header;
if (state.io.out) if (state.io.out)
header = state.io.out->getHeader(); header = state.io.out->getHeader();
else if (state.need_receive_data_for_input)
header = state.input_header;
last_block_in.header = header; last_block_in.header = header;
last_block_in.compression = state.compression; last_block_in.compression = state.compression;
@ -964,8 +1038,7 @@ void TCPHandler::initBlockInput()
state.block_in = std::make_shared<NativeBlockInputStream>( state.block_in = std::make_shared<NativeBlockInputStream>(
*state.maybe_compressed_in, *state.maybe_compressed_in,
header, header,
client_revision, client_revision);
!connection_context.getSettingsRef().low_cardinality_allow_in_native_format);
} }
} }

View File

@ -64,6 +64,13 @@ struct QueryState
/// Request requires data from the client (INSERT, but not INSERT SELECT). /// Request requires data from the client (INSERT, but not INSERT SELECT).
bool need_receive_data_for_insert = false; bool need_receive_data_for_insert = false;
/// Request requires data from client for function input()
bool need_receive_data_for_input = false;
/// temporary place for incoming data block for input()
Block block_for_input;
/// sample block from StorageInput
Block input_header;
/// To output progress, the difference after the previous sending of progress. /// To output progress, the difference after the previous sending of progress.
Progress progress; Progress progress;
@ -147,7 +154,9 @@ private:
bool receivePacket(); bool receivePacket();
void receiveQuery(); void receiveQuery();
bool receiveData(); bool receiveData();
bool readDataNext(const size_t & poll_interval, const int & receive_timeout);
void readData(const Settings & global_settings); void readData(const Settings & global_settings);
std::tuple<size_t, int> getReadTimeouts(const Settings & global_settings);
[[noreturn]] void receiveUnexpectedData(); [[noreturn]] void receiveUnexpectedData();
[[noreturn]] void receiveUnexpectedQuery(); [[noreturn]] void receiveUnexpectedQuery();

View File

@ -258,6 +258,7 @@
<metrics>true</metrics> <metrics>true</metrics>
<events>true</events> <events>true</events>
<events_cumulative>false</events_cumulative>
<asynchronous_metrics>true</asynchronous_metrics> <asynchronous_metrics>true</asynchronous_metrics>
</graphite> </graphite>
<graphite> <graphite>
@ -269,6 +270,7 @@
<metrics>true</metrics> <metrics>true</metrics>
<events>true</events> <events>true</events>
<events_cumulative>false</events_cumulative>
<asynchronous_metrics>false</asynchronous_metrics> <asynchronous_metrics>false</asynchronous_metrics>
</graphite> </graphite>
--> -->
@ -447,7 +449,7 @@
<query_masking_rules> <query_masking_rules>
<rule> <rule>
<name>hide SSN</name> <name>hide SSN</name>
<regexp>(^|\D)\d{3}-\d{2}-\d{4}($|\D)</regexp> <regexp>\b\d{3}-\d{2}-\d{4}\b</regexp>
<replace>000-00-0000</replace> <replace>000-00-0000</replace>
</rule> </rule>
</query_masking_rules> </query_masking_rules>

View File

@ -26,4 +26,10 @@ AggregateFunctionCombinatorPtr AggregateFunctionCombinatorFactory::tryFindSuffix
return {}; return {};
} }
AggregateFunctionCombinatorFactory & AggregateFunctionCombinatorFactory::instance()
{
static AggregateFunctionCombinatorFactory ret;
return ret;
}
} }

View File

@ -2,7 +2,6 @@
#include <AggregateFunctions/IAggregateFunctionCombinator.h> #include <AggregateFunctions/IAggregateFunctionCombinator.h>
#include <ext/singleton.h>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@ -13,13 +12,16 @@ namespace DB
/** Create aggregate function combinator by matching suffix in aggregate function name. /** Create aggregate function combinator by matching suffix in aggregate function name.
*/ */
class AggregateFunctionCombinatorFactory final: public ext::singleton<AggregateFunctionCombinatorFactory> class AggregateFunctionCombinatorFactory final: private boost::noncopyable
{ {
private: private:
using Dict = std::unordered_map<std::string, AggregateFunctionCombinatorPtr>; using Dict = std::unordered_map<std::string, AggregateFunctionCombinatorPtr>;
Dict dict; Dict dict;
public: public:
static AggregateFunctionCombinatorFactory & instance();
/// Not thread safe. You must register before using tryGet. /// Not thread safe. You must register before using tryGet.
void registerCombinator(const AggregateFunctionCombinatorPtr & value); void registerCombinator(const AggregateFunctionCombinatorPtr & value);

View File

@ -160,4 +160,10 @@ bool AggregateFunctionFactory::isAggregateFunctionName(const String & name, int
return false; return false;
} }
AggregateFunctionFactory & AggregateFunctionFactory::instance()
{
static AggregateFunctionFactory ret;
return ret;
}
} }

View File

@ -3,7 +3,6 @@
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>
#include <Common/IFactoryWithAliases.h> #include <Common/IFactoryWithAliases.h>
#include <ext/singleton.h>
#include <functional> #include <functional>
#include <memory> #include <memory>
@ -30,9 +29,12 @@ using AggregateFunctionCreator = std::function<AggregateFunctionPtr(const String
/** Creates an aggregate function by name. /** Creates an aggregate function by name.
*/ */
class AggregateFunctionFactory final : public ext::singleton<AggregateFunctionFactory>, public IFactoryWithAliases<AggregateFunctionCreator> class AggregateFunctionFactory final : private boost::noncopyable, public IFactoryWithAliases<AggregateFunctionCreator>
{ {
public: public:
static AggregateFunctionFactory & instance();
/// Register a function by its name. /// Register a function by its name.
/// No locking, you must register all functions before usage of get. /// No locking, you must register all functions before usage of get.
void registerFunction( void registerFunction(

View File

@ -63,7 +63,12 @@ public:
roaring_bitmap_add(rb, value); roaring_bitmap_add(rb, value);
} }
UInt64 size() const { return isSmall() ? small.size() : roaring_bitmap_get_cardinality(rb); } UInt64 size() const
{
return isSmall()
? small.size()
: roaring_bitmap_get_cardinality(rb);
}
void merge(const RoaringBitmapWithSmallSet & r1) void merge(const RoaringBitmapWithSmallSet & r1)
{ {
@ -88,10 +93,11 @@ public:
if (is_large) if (is_large)
{ {
toLarge(); std::string s;
UInt32 cardinality; readStringBinary(s,in);
readBinary(cardinality, in); rb = roaring_bitmap_portable_deserialize(s.c_str());
db_roaring_bitmap_add_many(in, rb, cardinality); for (const auto & x : small) // merge from small
roaring_bitmap_add(rb, x.getValue());
} }
else else
small.read(in); small.read(in);
@ -103,9 +109,10 @@ public:
if (isLarge()) if (isLarge())
{ {
UInt32 cardinality = roaring_bitmap_get_cardinality(rb); uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(rb);
writePODBinary(cardinality, out); std::string s(expectedsize,0);
db_ra_to_uint32_array(out, &rb->high_low_container); roaring_bitmap_portable_serialize(rb, const_cast<char*>(s.data()));
writeStringBinary(s,out);
} }
else else
small.write(out); small.write(out);
@ -243,13 +250,13 @@ public:
{ {
for (const auto & x : small) for (const auto & x : small)
if (r1.small.find(x.getValue()) != r1.small.end()) if (r1.small.find(x.getValue()) != r1.small.end())
retSize++; ++retSize;
} }
else if (isSmall() && r1.isLarge()) else if (isSmall() && r1.isLarge())
{ {
for (const auto & x : small) for (const auto & x : small)
if (roaring_bitmap_contains(r1.rb, x.getValue())) if (roaring_bitmap_contains(r1.rb, x.getValue()))
retSize++; ++retSize;
} }
else else
{ {
@ -389,8 +396,7 @@ public:
*/ */
UInt8 rb_contains(const UInt32 x) const UInt8 rb_contains(const UInt32 x) const
{ {
return isSmall() ? small.find(x) != small.end() : return isSmall() ? small.find(x) != small.end() : roaring_bitmap_contains(rb, x);
roaring_bitmap_contains(rb, x);
} }
/** /**
@ -458,21 +464,20 @@ public:
/** /**
* Return new set with specified range (not include the range_end) * Return new set with specified range (not include the range_end)
*/ */
UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet& r1) const UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet & r1) const
{ {
UInt64 count = 0; UInt64 count = 0;
if (range_start >= range_end) if (range_start >= range_end)
return count; return count;
if (isSmall()) if (isSmall())
{ {
std::vector<T> ans;
for (const auto & x : small) for (const auto & x : small)
{ {
T val = x.getValue(); T val = x.getValue();
if ((UInt32)val >= range_start && (UInt32)val < range_end) if (UInt32(val) >= range_start && UInt32(val) < range_end)
{ {
r1.add(val); r1.add(val);
count++; ++count;
} }
} }
} }
@ -481,18 +486,97 @@ public:
roaring_uint32_iterator_t iterator; roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator); roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (iterator.has_value) while (iterator.has_value && UInt32(iterator.current_value) < range_end)
{ {
if ((UInt32)iterator.current_value >= range_end)
break;
r1.add(iterator.current_value); r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator); roaring_advance_uint32_iterator(&iterator);
count++; ++count;
} }
} }
return count; return count;
} }
/**
* Return new set of the smallest `limit` values in set which is no less than `range_start`.
*/
UInt64 rb_limit(UInt32 range_start, UInt32 limit, RoaringBitmapWithSmallSet & r1) const
{
UInt64 count = 0;
if (isSmall())
{
std::vector<T> ans;
for (const auto & x : small)
{
T val = x.getValue();
if (UInt32(val) >= range_start)
{
ans.push_back(val);
}
}
sort(ans.begin(), ans.end());
if (limit > ans.size())
limit = ans.size();
for (size_t i = 0; i < limit; ++i)
r1.add(ans[i]);
count = UInt64(limit);
}
else
{
roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (UInt32(count) < limit && iterator.has_value)
{
r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator);
++count;
}
}
return count;
}
UInt64 rb_min() const
{
UInt64 min_val = UINT32_MAX;
if (isSmall())
{
for (const auto & x : small)
{
T val = x.getValue();
if (UInt64(val) < min_val)
{
min_val = UInt64(val);
}
}
}
else
{
min_val = UInt64(roaring_bitmap_minimum(rb));
}
return min_val;
}
UInt64 rb_max() const
{
UInt64 max_val = 0;
if (isSmall())
{
for (const auto & x : small)
{
T val = x.getValue();
if (UInt64(val) > max_val)
{
max_val = UInt64(val);
}
}
}
else
{
max_val = UInt64(roaring_bitmap_maximum(rb));
}
return max_val;
}
private: private:
/// To read and write the DB Buffer directly, migrate code from CRoaring /// To read and write the DB Buffer directly, migrate code from CRoaring
void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args) void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args)
@ -508,8 +592,8 @@ private:
readBinary(val, dbBuf); readBinary(val, dbBuf);
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
prev = val; prev = val;
i++; ++i;
for (; i < n_args; i++) for (; i < n_args; ++i)
{ {
readBinary(val, dbBuf); readBinary(val, dbBuf);
if (((prev ^ val) >> 16) == 0) if (((prev ^ val) >> 16) == 0)

View File

@ -10,6 +10,7 @@
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
#include <Common/HashTable/HashSet.h> #include <Common/HashTable/HashSet.h>
#include <Common/HashTable/HashTableKeyHolder.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>
@ -132,11 +133,6 @@ struct AggregateFunctionGroupUniqArrayGenericData
Set value; Set value;
}; };
/// Helper function for deserialize and insert for the class AggregateFunctionGroupUniqArrayGeneric
template <bool is_plain_column>
static StringRef getSerializationImpl(const IColumn & column, size_t row_num, Arena & arena);
template <bool is_plain_column> template <bool is_plain_column>
static void deserializeAndInsertImpl(StringRef str, IColumn & data_to); static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
@ -154,9 +150,18 @@ class AggregateFunctionGroupUniqArrayGeneric
using State = AggregateFunctionGroupUniqArrayGenericData; using State = AggregateFunctionGroupUniqArrayGenericData;
static StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena) static auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
{ {
return getSerializationImpl<is_plain_column>(column, row_num, arena); if constexpr (is_plain_column)
{
return ArenaKeyHolder{column.getDataAt(row_num), arena};
}
else
{
const char * begin = nullptr;
StringRef serialized = column.serializeValueIntoArena(row_num, arena, begin);
return SerializedKeyHolder{serialized, arena};
}
} }
static void deserializeAndInsert(StringRef str, IColumn & data_to) static void deserializeAndInsert(StringRef str, IColumn & data_to)
@ -209,26 +214,13 @@ public:
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{ {
auto & set = this->data(place).value; auto & set = this->data(place).value;
if (limit_num_elems && set.size() >= max_elems)
return;
bool inserted; bool inserted;
State::Set::iterator it; State::Set::iterator it;
auto key_holder = getKeyHolder(*columns[0], row_num, *arena);
if (limit_num_elems && set.size() >= max_elems) set.emplace(key_holder, it, inserted);
return;
StringRef str_serialized = getSerialization(*columns[0], row_num, *arena);
set.emplace(str_serialized, it, inserted);
if constexpr (!is_plain_column)
{
if (!inserted)
arena->rollback(str_serialized.size);
}
else
{
if (inserted)
it->getValueMutable().data = arena->insert(str_serialized.data, str_serialized.size);
}
} }
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
@ -241,15 +233,11 @@ public:
for (auto & rhs_elem : rhs_set) for (auto & rhs_elem : rhs_set)
{ {
if (limit_num_elems && cur_set.size() >= max_elems) if (limit_num_elems && cur_set.size() >= max_elems)
return ; return;
cur_set.emplace(rhs_elem.getValue(), it, inserted);
if (inserted) // We have to copy the keys to our arena.
{ assert(arena != nullptr);
if (it->getValue().size) cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
it->getValueMutable().data = arena->insert(it->getValue().data, it->getValue().size);
else
it->getValueMutable().data = nullptr;
}
} }
} }
@ -271,20 +259,6 @@ public:
const char * getHeaderFilePath() const override { return __FILE__; } const char * getHeaderFilePath() const override { return __FILE__; }
}; };
template <>
inline StringRef getSerializationImpl<false>(const IColumn & column, size_t row_num, Arena & arena)
{
const char * begin = nullptr;
return column.serializeValueIntoArena(row_num, arena, begin);
}
template <>
inline StringRef getSerializationImpl<true>(const IColumn & column, size_t row_num, Arena &)
{
return column.getDataAt(row_num);
}
template <> template <>
inline void deserializeAndInsertImpl<false>(StringRef str, IColumn & data_to) inline void deserializeAndInsertImpl<false>(StringRef str, IColumn & data_to)
{ {

View File

@ -238,6 +238,10 @@ void Adam::read(ReadBuffer & buf)
void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
{ {
auto & adam_rhs = static_cast<const Adam &>(rhs); auto & adam_rhs = static_cast<const Adam &>(rhs);
if (adam_rhs.average_gradient.empty())
return;
if (average_gradient.empty()) if (average_gradient.empty())
{ {
if (!average_squared_gradient.empty() || if (!average_squared_gradient.empty() ||

View File

@ -31,8 +31,9 @@ namespace ErrorCodes
ConnectionPoolWithFailover::ConnectionPoolWithFailover( ConnectionPoolWithFailover::ConnectionPoolWithFailover(
ConnectionPoolPtrs nested_pools_, ConnectionPoolPtrs nested_pools_,
LoadBalancing load_balancing, LoadBalancing load_balancing,
time_t decrease_error_period_) time_t decrease_error_period_,
: Base(std::move(nested_pools_), decrease_error_period_, &Logger::get("ConnectionPoolWithFailover")) size_t max_error_cap_)
: Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Logger::get("ConnectionPoolWithFailover"))
, default_load_balancing(load_balancing) , default_load_balancing(load_balancing)
{ {
const std::string & local_hostname = getFQDNOrHostName(); const std::string & local_hostname = getFQDNOrHostName();
@ -73,6 +74,31 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
return Base::get(try_get_entry, get_priority); return Base::get(try_get_entry, get_priority);
} }
ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const
{
const Base::PoolStates states = getPoolStates();
const Base::NestedPools pools = nested_pools;
assert(states.size() == pools.size());
ConnectionPoolWithFailover::Status result;
result.reserve(states.size());
const time_t since_last_error_decrease = time(nullptr) - last_error_decrease_time;
for (size_t i = 0; i < states.size(); ++i)
{
const auto rounds_to_zero_errors = states[i].error_count ? bitScanReverse(states[i].error_count) + 1 : 0;
const auto seconds_to_zero_errors = std::max(static_cast<time_t>(0), rounds_to_zero_errors * decrease_error_period - since_last_error_decrease);
result.emplace_back(NestedPoolStatus{
pools[i].get(),
states[i].error_count,
std::chrono::seconds{seconds_to_zero_errors}
});
}
return result;
}
std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts, std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts,
const Settings * settings, const Settings * settings,
PoolMode pool_mode) PoolMode pool_mode)

View File

@ -3,6 +3,9 @@
#include <Common/PoolWithFailoverBase.h> #include <Common/PoolWithFailoverBase.h>
#include <Client/ConnectionPool.h> #include <Client/ConnectionPool.h>
#include <chrono>
#include <vector>
namespace DB namespace DB
{ {
@ -34,7 +37,8 @@ public:
ConnectionPoolWithFailover( ConnectionPoolWithFailover(
ConnectionPoolPtrs nested_pools_, ConnectionPoolPtrs nested_pools_,
LoadBalancing load_balancing, LoadBalancing load_balancing,
time_t decrease_error_period_ = DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD); time_t decrease_error_period_ = DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD,
size_t max_error_cap = DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT);
using Entry = IConnectionPool::Entry; using Entry = IConnectionPool::Entry;
@ -64,6 +68,16 @@ public:
PoolMode pool_mode, PoolMode pool_mode,
const QualifiedTableName & table_to_check); const QualifiedTableName & table_to_check);
struct NestedPoolStatus
{
const IConnectionPool * pool;
size_t error_count;
std::chrono::seconds estimated_recovery_time;
};
using Status = std::vector<NestedPoolStatus>;
Status getStatus() const;
private: private:
/// Get the values of relevant settings and call Base::getMany() /// Get the values of relevant settings and call Base::getMany()
std::vector<TryResult> getManyImpl( std::vector<TryResult> getManyImpl(

View File

@ -156,6 +156,24 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n)
getNullMapData().push_back(src_concrete.getNullMapData()[n]); getNullMapData().push_back(src_concrete.getNullMapData()[n]);
} }
void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n)
{
getNestedColumn().insertFrom(src, n);
getNullMapData().push_back(0);
}
void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length)
{
getNestedColumn().insertRangeFrom(src, start, length);
getNullMapData().resize_fill(getNullMapData().size() + length, 0);
}
void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
insertFromNotNullable(src, position);
}
void ColumnNullable::popBack(size_t n) void ColumnNullable::popBack(size_t n)
{ {
getNestedColumn().popBack(n); getNestedColumn().popBack(n);

View File

@ -61,6 +61,10 @@ public:
void insert(const Field & x) override; void insert(const Field & x) override;
void insertFrom(const IColumn & src, size_t n) override; void insertFrom(const IColumn & src, size_t n) override;
void insertFromNotNullable(const IColumn & src, size_t n);
void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length);
void insertManyFromNotNullable(const IColumn & src, size_t position, size_t length);
void insertDefault() override void insertDefault() override
{ {
getNestedColumn().insertDefault(); getNestedColumn().insertDefault();

View File

@ -146,6 +146,13 @@ public:
/// Could be used to concatenate columns. /// Could be used to concatenate columns.
virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
/// Appends one element from other column with the same type multiple times.
virtual void insertManyFrom(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
insertFrom(src, position);
}
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented). /// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
/// Is used to optimize some computations (in aggregation, for example). /// Is used to optimize some computations (in aggregation, for example).
/// Parameter length could be ignored if column values have fixed size. /// Parameter length could be ignored if column values have fixed size.
@ -157,6 +164,13 @@ public:
/// For example, ColumnNullable(Nested) absolutely ignores values of nested column if it is marked as NULL. /// For example, ColumnNullable(Nested) absolutely ignores values of nested column if it is marked as NULL.
virtual void insertDefault() = 0; virtual void insertDefault() = 0;
/// Appends "default value" multiple times.
virtual void insertManyDefaults(size_t length)
{
for (size_t i = 0; i < length; ++i)
insertDefault();
}
/** Removes last n elements. /** Removes last n elements.
* Is used to support exception-safety of several operations. * Is used to support exception-safety of several operations.
* For example, sometimes insertion should be reverted if we catch an exception during operation processing. * For example, sometimes insertion should be reverted if we catch an exception during operation processing.

View File

@ -178,8 +178,13 @@ protected:
// hash tables, it makes sense to pre-fault the pages by passing // hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster // MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults. // overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
#if defined(__linux__)
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
| (mmap_populate ? MAP_POPULATE : 0); | (mmap_populate ? MAP_POPULATE : 0);
#else
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
#endif
private: private:
void * allocNoTrack(size_t size, size_t alignment) void * allocNoTrack(size_t size, size_t alignment)

View File

@ -97,13 +97,23 @@ private:
size_t size_after_grow = 0; size_t size_after_grow = 0;
if (head->size() < linear_growth_threshold) if (head->size() < linear_growth_threshold)
size_after_grow = head->size() * growth_factor; {
size_after_grow = std::max(min_next_size, head->size() * growth_factor);
}
else else
size_after_grow = linear_growth_threshold; {
// allocContinue() combined with linear growth results in quadratic
if (size_after_grow < min_next_size) // behavior: we append the data by small amounts, and when it
size_after_grow = min_next_size; // doesn't fit, we create a new chunk and copy all the previous data
// into it. The number of times we do this is directly proportional
// to the total size of data that is going to be serialized. To make
// the copying happen less often, round the next size up to the
// linear_growth_threshold.
size_after_grow = ((min_next_size + linear_growth_threshold - 1)
/ linear_growth_threshold) * linear_growth_threshold;
}
assert(size_after_grow >= min_next_size);
return roundUpToPageSize(size_after_grow); return roundUpToPageSize(size_after_grow);
} }
@ -170,72 +180,78 @@ public:
/** Rollback just performed allocation. /** Rollback just performed allocation.
* Must pass size not more that was just allocated. * Must pass size not more that was just allocated.
* Return the resulting head pointer, so that the caller can assert that
* the allocation it intended to roll back was indeed the last one.
*/ */
void rollback(size_t size) void * rollback(size_t size)
{ {
head->pos -= size; head->pos -= size;
ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right); ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
return head->pos;
} }
/** Begin or expand allocation of contiguous piece of memory without alignment. /** Begin or expand a contiguous range of memory.
* 'begin' - current begin of piece of memory, if it need to be expanded, or nullptr, if it need to be started. * 'range_start' is the start of range. If nullptr, a new range is
* If there is no space in chunk to expand current piece of memory - then copy all piece to new chunk and change value of 'begin'. * allocated.
* NOTE This method is usable only for latest allocation. For earlier allocations, see 'realloc' method. * If there is no space in the current chunk to expand the range,
* the entire range is copied to a new, bigger memory chunk, and the value
* of 'range_start' is updated.
* If the optional 'start_alignment' is specified, the start of range is
* kept aligned to this value.
*
* NOTE This method is usable only for the last allocation made on this
* Arena. For earlier allocations, see 'realloc' method.
*/ */
char * allocContinue(size_t size, char const *& begin) char * allocContinue(size_t additional_bytes, char const *& range_start,
size_t start_alignment = 0)
{ {
while (unlikely(head->pos + size > head->end)) if (!range_start)
{ {
char * prev_end = head->pos; // Start a new memory range.
addChunk(size); char * result = start_alignment
? alignedAlloc(additional_bytes, start_alignment)
: alloc(additional_bytes);
if (begin) range_start = result;
begin = insert(begin, prev_end - begin); return result;
else
break;
} }
char * res = head->pos; // Extend an existing memory range with 'additional_bytes'.
head->pos += size;
if (!begin) // This method only works for extending the last allocation. For lack of
begin = res; // original size, check a weaker condition: that 'begin' is at least in
// the current Chunk.
assert(range_start >= head->begin && range_start < head->end);
ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right); if (head->pos + additional_bytes <= head->end)
return res;
}
char * alignedAllocContinue(size_t size, char const *& begin, size_t alignment)
{
char * res;
do
{ {
void * head_pos = head->pos; // The new size fits into the last chunk, so just alloc the
size_t space = head->end - head->pos; // additional size. We can alloc without alignment here, because it
// only applies to the start of the range, and we don't change it.
return alloc(additional_bytes);
}
res = static_cast<char *>(std::align(alignment, size, head_pos, space)); // New range doesn't fit into this chunk, will copy to a new one.
if (res) //
{ // Note: among other things, this method is used to provide a hack-ish
head->pos = static_cast<char *>(head_pos); // implementation of realloc over Arenas in ArenaAllocators. It wastes a
head->pos += size; // lot of memory -- quadratically so when we reach the linear allocation
break; // threshold. This deficiency is intentionally left as is, and should be
} // solved not by complicating this method, but by rethinking the
// approach to memory management for aggregate function states, so that
// we can provide a proper realloc().
const size_t existing_bytes = head->pos - range_start;
const size_t new_bytes = existing_bytes + additional_bytes;
const char * old_range = range_start;
char * prev_end = head->pos; char * new_range = start_alignment
addChunk(size + alignment); ? alignedAlloc(new_bytes, start_alignment)
: alloc(new_bytes);
if (begin) memcpy(new_range, old_range, existing_bytes);
begin = alignedInsert(begin, prev_end - begin, alignment);
else
break;
} while (true);
if (!begin) range_start = new_range;
begin = res; return new_range + existing_bytes;
ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
return res;
} }
/// NOTE Old memory region is wasted. /// NOTE Old memory region is wasted.

View File

@ -54,7 +54,7 @@ public:
if (data + old_size == arena->head->pos) if (data + old_size == arena->head->pos)
{ {
arena->alignedAllocContinue(new_size - old_size, data, alignment); arena->allocContinue(new_size - old_size, data, alignment);
return reinterpret_cast<void *>(const_cast<char *>(data)); return reinterpret_cast<void *>(const_cast<char *>(data));
} }
else else

View File

@ -1,6 +1,8 @@
#pragma once #pragma once
#include <Common/HashTable/HashTable.h>
#include <Common/HashTable/HashTableKeyHolder.h>
#include <Common/ColumnsHashingImpl.h> #include <Common/ColumnsHashingImpl.h>
#include <Common/Arena.h> #include <Common/Arena.h>
#include <Common/LRUCache.h> #include <Common/LRUCache.h>
@ -57,13 +59,7 @@ struct HashMethodOneNumber
using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t
/// Is used for default implementation in HashMethodBase. /// Is used for default implementation in HashMethodBase.
FieldType getKey(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); } FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
/// Get StringRef from value which can be inserted into column.
static StringRef getValueRef(const Value & value)
{
return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
}
}; };
@ -86,24 +82,22 @@ struct HashMethodString
chars = column_string.getChars().data(); chars = column_string.getChars().data();
} }
auto getKey(ssize_t row, Arena &) const auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const
{ {
return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1); StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
}
static StringRef getValueRef(const Value & value) { return value.first; } if constexpr (place_string_to_arena)
{
return ArenaKeyHolder{key, pool};
}
else
{
return key;
}
}
protected: protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
{
if constexpr (place_string_to_arena)
{
if (key.size)
key.data = pool.insert(key.data, key.size);
}
}
}; };
@ -126,17 +120,22 @@ struct HashMethodFixedString
chars = &column_string.getChars(); chars = &column_string.getChars();
} }
StringRef getKey(size_t row, Arena &) const { return StringRef(&(*chars)[row * n], n); } auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const
{
StringRef key(&(*chars)[row * n], n);
static StringRef getValueRef(const Value & value) { return value.first; } if constexpr (place_string_to_arena)
{
return ArenaKeyHolder{key, pool};
}
else
{
return key;
}
}
protected: protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>; friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
{
if constexpr (place_string_to_arena)
key.data = pool.insert(key.data, key.size);
}
}; };
@ -316,10 +315,10 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
} }
} }
/// Get the key from the key columns for insertion into the hash table. /// Get the key holder from the key columns for insertion into the hash table.
ALWAYS_INLINE auto getKey(size_t row, Arena & pool) const ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const
{ {
return Base::getKey(getIndexAt(row), pool); return Base::getKeyHolder(getIndexAt(row), pool);
} }
template <typename Data> template <typename Data>
@ -347,30 +346,23 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
return EmplaceResult(false); return EmplaceResult(false);
} }
auto key = getKey(row_, pool); auto key_holder = getKeyHolder(row_, pool);
bool inserted = false; bool inserted = false;
typename Data::iterator it; typename Data::iterator it;
if (saved_hash) if (saved_hash)
data.emplace(key, it, inserted, saved_hash[row]); data.emplace(key_holder, it, inserted, saved_hash[row]);
else else
data.emplace(key, it, inserted); data.emplace(key_holder, it, inserted);
visit_cache[row] = VisitValue::Found; visit_cache[row] = VisitValue::Found;
if (inserted)
{
if constexpr (has_mapped)
{
new(&it->getSecond()) Mapped();
Base::onNewKey(it->getFirstMutable(), pool);
}
else
Base::onNewKey(*it, pool);
}
if constexpr (has_mapped) if constexpr (has_mapped)
{ {
if (inserted)
{
new (&it->getSecond()) Mapped();
}
mapped_cache[row] = it->getSecond(); mapped_cache[row] = it->getSecond();
return EmplaceResult(it->getSecond(), mapped_cache[row], inserted); return EmplaceResult(it->getSecond(), mapped_cache[row], inserted);
} }
@ -407,13 +399,13 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
return FindResult(visit_cache[row] == VisitValue::Found); return FindResult(visit_cache[row] == VisitValue::Found);
} }
auto key = getKey(row_, pool); auto key_holder = getKeyHolder(row_, pool);
typename Data::iterator it; typename Data::iterator it;
if (saved_hash) if (saved_hash)
it = data.find(key, saved_hash[row]); it = data.find(*key_holder, saved_hash[row]);
else else
it = data.find(key); it = data.find(*key_holder);
bool found = it != data.end(); bool found = it != data.end();
visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound; visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
@ -493,7 +485,7 @@ struct HashMethodKeysFixed
} }
} }
ALWAYS_INLINE Key getKey(size_t row, Arena &) const ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
{ {
if constexpr (has_nullable_keys) if constexpr (has_nullable_keys)
{ {
@ -532,12 +524,12 @@ struct HashMethodSerialized
protected: protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
ALWAYS_INLINE StringRef getKey(size_t row, Arena & pool) const ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const
{ {
return serializeKeysToPoolContiguous(row, keys_size, key_columns, pool); return SerializedKeyHolder{
serializeKeysToPoolContiguous(row, keys_size, key_columns, pool),
pool};
} }
static ALWAYS_INLINE void onExistingKey(StringRef & key, Arena & pool) { pool.rollback(key.size); }
}; };
/// For the case when there is one string key. /// For the case when there is one string key.
@ -554,11 +546,9 @@ struct HashMethodHashed
HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &) HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &)
: key_columns(std::move(key_columns_)) {} : key_columns(std::move(key_columns_)) {}
ALWAYS_INLINE Key getKey(size_t row, Arena &) const { return hash128(row, key_columns.size(), key_columns); } ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
static ALWAYS_INLINE StringRef getValueRef(const Value & value)
{ {
return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first)); return hash128(row, key_columns.size(), key_columns);
} }
}; };

View File

@ -2,6 +2,7 @@
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/HashTable/HashTableKeyHolder.h>
#include <Interpreters/AggregationCommon.h> #include <Interpreters/AggregationCommon.h>
@ -117,26 +118,22 @@ public:
template <typename Data> template <typename Data>
ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
{ {
auto key = static_cast<Derived &>(*this).getKey(row, pool); auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
return emplaceKeyImpl(key, data, pool); return emplaceImpl(key_holder, data);
} }
template <typename Data> template <typename Data>
ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool) ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
{ {
auto key = static_cast<Derived &>(*this).getKey(row, pool); auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
auto res = findKeyImpl(key, data); return findKeyImpl(keyHolderGetKey(key_holder), data);
static_cast<Derived &>(*this).onExistingKey(key, pool);
return res;
} }
template <typename Data> template <typename Data>
ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
{ {
auto key = static_cast<Derived &>(*this).getKey(row, pool); auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
auto res = data.hash(key); return data.hash(keyHolderGetKey(key_holder));
static_cast<Derived &>(*this).onExistingKey(key, pool);
return res;
} }
protected: protected:
@ -157,20 +154,13 @@ protected:
} }
} }
template <typename Key> template <typename Data, typename KeyHolder>
static ALWAYS_INLINE void onNewKey(Key & /*key*/, Arena & /*pool*/) {} ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
template <typename Key>
static ALWAYS_INLINE void onExistingKey(Key & /*key*/, Arena & /*pool*/) {}
template <typename Data, typename Key>
ALWAYS_INLINE EmplaceResult emplaceKeyImpl(Key key, Data & data, Arena & pool)
{ {
if constexpr (Cache::consecutive_keys_optimization) if constexpr (Cache::consecutive_keys_optimization)
{ {
if (cache.found && cache.check(key)) if (cache.found && cache.check(keyHolderGetKey(key_holder)))
{ {
static_cast<Derived &>(*this).onExistingKey(key, pool);
if constexpr (has_mapped) if constexpr (has_mapped)
return EmplaceResult(cache.value.second, cache.value.second, false); return EmplaceResult(cache.value.second, cache.value.second, false);
else else
@ -180,7 +170,7 @@ protected:
typename Data::iterator it; typename Data::iterator it;
bool inserted = false; bool inserted = false;
data.emplace(key, it, inserted); data.emplace(key_holder, it, inserted);
[[maybe_unused]] Mapped * cached = nullptr; [[maybe_unused]] Mapped * cached = nullptr;
if constexpr (has_mapped) if constexpr (has_mapped)
@ -191,13 +181,8 @@ protected:
if constexpr (has_mapped) if constexpr (has_mapped)
{ {
new(&it->getSecond()) Mapped(); new(&it->getSecond()) Mapped();
static_cast<Derived &>(*this).onNewKey(it->getFirstMutable(), pool);
} }
else
static_cast<Derived &>(*this).onNewKey(it->getValueMutable(), pool);
} }
else
static_cast<Derived &>(*this).onExistingKey(key, pool);
if constexpr (consecutive_keys_optimization) if constexpr (consecutive_keys_optimization)
{ {

View File

@ -223,5 +223,10 @@ void DNSResolver::addToNewHosts(const String & host)
DNSResolver::~DNSResolver() = default; DNSResolver::~DNSResolver() = default;
DNSResolver & DNSResolver::instance()
{
static DNSResolver ret;
return ret;
}
} }

View File

@ -2,9 +2,9 @@
#include <Poco/Net/IPAddress.h> #include <Poco/Net/IPAddress.h>
#include <Poco/Net/SocketAddress.h> #include <Poco/Net/SocketAddress.h>
#include <memory> #include <memory>
#include <ext/singleton.h>
#include <Core/Types.h> #include <Core/Types.h>
#include <Core/Names.h> #include <Core/Names.h>
#include <boost/noncopyable.hpp>
namespace DB namespace DB
@ -13,9 +13,10 @@ namespace DB
/// A singleton implementing DNS names resolving with optional DNS cache /// A singleton implementing DNS names resolving with optional DNS cache
/// The cache is being updated asynchronous in separate thread (see DNSCacheUpdater) /// The cache is being updated asynchronous in separate thread (see DNSCacheUpdater)
/// or it could be updated manually via drop() method. /// or it could be updated manually via drop() method.
class DNSResolver : public ext::singleton<DNSResolver> class DNSResolver : private boost::noncopyable
{ {
public: public:
static DNSResolver & instance();
DNSResolver(const DNSResolver &) = delete; DNSResolver(const DNSResolver &) = delete;
@ -46,8 +47,6 @@ private:
DNSResolver(); DNSResolver();
friend class ext::singleton<DNSResolver>;
struct Impl; struct Impl;
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;

View File

@ -0,0 +1,538 @@
#include <Common/DiskSpaceMonitor.h>
#include <set>
#include <Common/escapeForFileName.h>
#include <Poco/File.h>
namespace DB
{
namespace DiskSpace
{
std::mutex Disk::mutex;
std::filesystem::path getMountPoint(std::filesystem::path absolute_path)
{
if (absolute_path.is_relative())
throw Exception("Path is relative. It's a bug.", ErrorCodes::LOGICAL_ERROR);
absolute_path = std::filesystem::canonical(absolute_path);
const auto get_device_id = [](const std::filesystem::path & p)
{
struct stat st;
if (stat(p.c_str(), &st))
throwFromErrnoWithPath("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR);
return st.st_dev;
};
/// If /some/path/to/dir/ and /some/path/to/ have different device id,
/// then device which contains /some/path/to/dir/filename is mounted to /some/path/to/dir/
auto device_id = get_device_id(absolute_path);
while (absolute_path.has_relative_path())
{
auto parent = absolute_path.parent_path();
auto parent_device_id = get_device_id(parent);
if (device_id != parent_device_id)
return absolute_path;
absolute_path = parent;
device_id = parent_device_id;
}
return absolute_path;
}
/// Returns name of filesystem mounted to mount_point
#if !defined(__linux__)
[[noreturn]]
#endif
std::string getFilesystemName([[maybe_unused]] const std::string & mount_point)
{
#if defined(__linux__)
auto mounted_filesystems = setmntent("/etc/mtab", "r");
if (!mounted_filesystems)
throw DB::Exception("Cannot open /etc/mtab to get name of filesystem", ErrorCodes::SYSTEM_ERROR);
mntent fs_info;
constexpr size_t buf_size = 4096; /// The same as buffer used for getmntent in glibc. It can happen that it's not enough
char buf[buf_size];
while (getmntent_r(mounted_filesystems, &fs_info, buf, buf_size) && fs_info.mnt_dir != mount_point)
;
endmntent(mounted_filesystems);
if (fs_info.mnt_dir != mount_point)
throw DB::Exception("Cannot find name of filesystem by mount point " + mount_point, ErrorCodes::SYSTEM_ERROR);
return fs_info.mnt_fsname;
#else
throw DB::Exception("Supported on linux only", ErrorCodes::NOT_IMPLEMENTED);
#endif
}
ReservationPtr Disk::reserve(UInt64 bytes) const
{
if (!tryReserve(bytes))
return {};
return std::make_unique<Reservation>(bytes, std::static_pointer_cast<const Disk>(shared_from_this()));
}
bool Disk::tryReserve(UInt64 bytes) const
{
std::lock_guard lock(mutex);
if (bytes == 0)
{
LOG_DEBUG(&Logger::get("DiskSpaceMonitor"), "Reserving 0 bytes on disk " << name);
++reservation_count;
return true;
}
auto available_space = getAvailableSpace();
UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes);
if (unreserved_space >= bytes)
{
LOG_DEBUG(
&Logger::get("DiskSpaceMonitor"),
"Reserving " << bytes << " bytes on disk " << name << " having unreserved " << unreserved_space << " bytes.");
++reservation_count;
reserved_bytes += bytes;
return true;
}
return false;
}
UInt64 Disk::getUnreservedSpace() const
{
std::lock_guard lock(mutex);
auto available_space = getSpaceInformation().getAvailableSpace();
available_space -= std::min(available_space, reserved_bytes);
return available_space;
}
UInt64 Disk::Stat::getTotalSpace() const
{
UInt64 total_size = fs.f_blocks * fs.f_bsize;
if (total_size < keep_free_space_bytes)
return 0;
return total_size - keep_free_space_bytes;
}
UInt64 Disk::Stat::getAvailableSpace() const
{
/// we use f_bavail, because part of b_free space is
/// available for superuser only and for system purposes
UInt64 total_size = fs.f_bavail * fs.f_bsize;
if (total_size < keep_free_space_bytes)
return 0;
return total_size - keep_free_space_bytes;
}
Reservation::~Reservation()
{
try
{
std::lock_guard lock(Disk::mutex);
if (disk_ptr->reserved_bytes < size)
{
disk_ptr->reserved_bytes = 0;
LOG_ERROR(&Logger::get("DiskSpaceMonitor"), "Unbalanced reservations size for disk '" + disk_ptr->getName() + "'.");
}
else
{
disk_ptr->reserved_bytes -= size;
}
if (disk_ptr->reservation_count == 0)
LOG_ERROR(&Logger::get("DiskSpaceMonitor"), "Unbalanced reservation count for disk '" + disk_ptr->getName() + "'.");
else
--disk_ptr->reservation_count;
}
catch (...)
{
tryLogCurrentException("~DiskSpaceMonitor");
}
}
void Reservation::update(UInt64 new_size)
{
std::lock_guard lock(Disk::mutex);
disk_ptr->reserved_bytes -= size;
size = new_size;
disk_ptr->reserved_bytes += size;
}
DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const String & default_path)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
constexpr auto default_disk_name = "default";
bool has_default_disk = false;
for (const auto & disk_name : keys)
{
if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
auto disk_config_prefix = config_prefix + "." + disk_name;
bool has_space_ratio = config.has(disk_config_prefix + ".keep_free_space_ratio");
if (config.has(disk_config_prefix + ".keep_free_space_bytes") && has_space_ratio)
throw Exception("Only one of 'keep_free_space_bytes' and 'keep_free_space_ratio' can be specified",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
UInt64 keep_free_space_bytes = config.getUInt64(disk_config_prefix + ".keep_free_space_bytes", 0);
String path;
if (config.has(disk_config_prefix + ".path"))
path = config.getString(disk_config_prefix + ".path");
if (has_space_ratio)
{
auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
if (ratio < 0 || ratio > 1)
throw Exception("'keep_free_space_ratio' have to be between 0 and 1",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
String tmp_path = path;
if (tmp_path.empty())
tmp_path = default_path;
// Create tmp disk for getting total disk space.
keep_free_space_bytes = static_cast<UInt64>(Disk("tmp", tmp_path, 0).getTotalSpace() * ratio);
}
if (disk_name == default_disk_name)
{
has_default_disk = true;
if (!path.empty())
throw Exception("\"default\" disk path should be provided in <path> not it <storage_configuration>",
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
disks.emplace(disk_name, std::make_shared<const Disk>(disk_name, default_path, keep_free_space_bytes));
}
else
{
if (path.empty())
throw Exception("Disk path can not be empty. Disk " + disk_name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
if (path.back() != '/')
throw Exception("Disk path must end with /. Disk " + disk_name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
disks.emplace(disk_name, std::make_shared<const Disk>(disk_name, path, keep_free_space_bytes));
}
}
if (!has_default_disk)
disks.emplace(default_disk_name, std::make_shared<const Disk>(default_disk_name, default_path, 0));
}
const DiskPtr & DiskSelector::operator[](const String & name) const
{
auto it = disks.find(name);
if (it == disks.end())
throw Exception("Unknown disk " + name, ErrorCodes::UNKNOWN_DISK);
return it->second;
}
Volume::Volume(
String name_,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const DiskSelector & disk_selector)
: name(std::move(name_))
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
Logger * logger = &Logger::get("StorageConfiguration");
for (const auto & disk : keys)
{
if (startsWith(disk, "disk"))
{
auto disk_name = config.getString(config_prefix + "." + disk);
disks.push_back(disk_selector[disk_name]);
}
}
if (disks.empty())
throw Exception("Volume must contain at least one disk.", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
auto has_max_bytes = config.has(config_prefix + ".max_data_part_size_bytes");
auto has_max_ratio = config.has(config_prefix + ".max_data_part_size_ratio");
if (has_max_bytes && has_max_ratio)
throw Exception("Only one of 'max_data_part_size_bytes' and 'max_data_part_size_ratio' should be specified.",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
if (has_max_bytes)
{
max_data_part_size = config.getUInt64(config_prefix + ".max_data_part_size_bytes", 0);
}
else if (has_max_ratio)
{
auto ratio = config.getDouble(config_prefix + ".max_data_part_size_ratio");
if (ratio < 0)
throw Exception("'max_data_part_size_ratio' have to be not less then 0.",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
UInt64 sum_size = 0;
std::vector<UInt64> sizes;
for (const auto & disk : disks)
{
sizes.push_back(disk->getTotalSpace());
sum_size += sizes.back();
}
max_data_part_size = static_cast<decltype(max_data_part_size)>(sum_size * ratio / disks.size());
for (size_t i = 0; i < disks.size(); ++i)
if (sizes[i] < max_data_part_size)
LOG_WARNING(logger, "Disk " << disks[i]->getName() << " on volume " << config_prefix <<
" have not enough space (" << sizes[i] <<
") for containing part the size of max_data_part_size (" <<
max_data_part_size << ")");
}
constexpr UInt64 MIN_PART_SIZE = 8u * 1024u * 1024u;
if (max_data_part_size < MIN_PART_SIZE)
LOG_WARNING(logger, "Volume '" << name << "' max_data_part_size is too low ("
<< formatReadableSizeWithBinarySuffix(max_data_part_size) << " < "
<< formatReadableSizeWithBinarySuffix(MIN_PART_SIZE) << ")");
}
ReservationPtr Volume::reserve(UInt64 expected_size) const
{
/// This volume can not store files which size greater than max_data_part_size
if (max_data_part_size != 0 && expected_size > max_data_part_size)
return {};
size_t start_from = last_used.fetch_add(1u, std::memory_order_relaxed);
size_t disks_num = disks.size();
for (size_t i = 0; i < disks_num; ++i)
{
size_t index = (start_from + i) % disks_num;
auto reservation = disks[index]->reserve(expected_size);
if (reservation)
return reservation;
}
return {};
}
UInt64 Volume::getMaxUnreservedFreeSpace() const
{
UInt64 res = 0;
for (const auto & disk : disks)
res = std::max(res, disk->getUnreservedSpace());
return res;
}
StoragePolicy::StoragePolicy(
String name_,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const DiskSelector & disks)
: name(std::move(name_))
{
String volumes_prefix = config_prefix + ".volumes";
if (!config.has(volumes_prefix))
throw Exception("StoragePolicy must contain at least one volume (.volumes)", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(volumes_prefix, keys);
for (const auto & attr_name : keys)
{
if (!std::all_of(attr_name.begin(), attr_name.end(), isWordCharASCII))
throw Exception("Volume name can contain only alphanumeric and '_' (" + attr_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
volumes.push_back(std::make_shared<Volume>(attr_name, config, volumes_prefix + "." + attr_name, disks));
if (volumes_names.find(attr_name) != volumes_names.end())
throw Exception("Volumes names must be unique (" + attr_name + " duplicated)", ErrorCodes::UNKNOWN_POLICY);
volumes_names[attr_name] = volumes.size() - 1;
}
if (volumes.empty())
throw Exception("StoragePolicy must contain at least one volume.", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
/// Check that disks are unique in Policy
std::set<String> disk_names;
for (const auto & volume : volumes)
{
for (const auto & disk : volume->disks)
{
if (disk_names.find(disk->getName()) != disk_names.end())
throw Exception("Duplicate disk '" + disk->getName() + "' in storage policy '" + name + "'", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
disk_names.insert(disk->getName());
}
}
move_factor = config.getDouble(config_prefix + ".move_factor", 0.1);
if (move_factor > 1)
throw Exception("Disk move factor have to be in [0., 1.] interval, but set to " + toString(move_factor),
ErrorCodes::LOGICAL_ERROR);
}
StoragePolicy::StoragePolicy(String name_, Volumes volumes_, double move_factor_)
: volumes(std::move(volumes_))
, name(std::move(name_))
, move_factor(move_factor_)
{
if (volumes.empty())
throw Exception("StoragePolicy must contain at least one Volume.", ErrorCodes::UNKNOWN_POLICY);
if (move_factor > 1)
throw Exception("Disk move factor have to be in [0., 1.] interval, but set to " + toString(move_factor),
ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < volumes.size(); ++i)
{
if (volumes_names.find(volumes[i]->getName()) != volumes_names.end())
throw Exception("Volumes names must be unique (" + volumes[i]->getName() + " duplicated).", ErrorCodes::UNKNOWN_POLICY);
volumes_names[volumes[i]->getName()] = i;
}
}
Disks StoragePolicy::getDisks() const
{
Disks res;
for (const auto & volume : volumes)
for (const auto & disk : volume->disks)
res.push_back(disk);
return res;
}
DiskPtr StoragePolicy::getAnyDisk() const
{
/// StoragePolicy must contain at least one Volume
/// Volume must contain at least one Disk
if (volumes.empty())
throw Exception("StoragePolicy has no volumes. It's a bug.", ErrorCodes::NOT_ENOUGH_SPACE);
if (volumes[0]->disks.empty())
throw Exception("Volume '" + volumes[0]->getName() + "' has no disks. It's a bug.", ErrorCodes::NOT_ENOUGH_SPACE);
return volumes[0]->disks[0];
}
DiskPtr StoragePolicy::getDiskByName(const String & disk_name) const
{
for (auto && volume : volumes)
for (auto && disk : volume->disks)
if (disk->getName() == disk_name)
return disk;
return {};
}
UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const
{
UInt64 res = 0;
for (const auto & volume : volumes)
res = std::max(res, volume->getMaxUnreservedFreeSpace());
return res;
}
ReservationPtr StoragePolicy::reserve(UInt64 expected_size, size_t min_volume_index) const
{
for (size_t i = min_volume_index; i < volumes.size(); ++i)
{
const auto & volume = volumes[i];
auto reservation = volume->reserve(expected_size);
if (reservation)
return reservation;
}
return {};
}
ReservationPtr StoragePolicy::reserve(UInt64 expected_size) const
{
return reserve(expected_size, 0);
}
ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const
{
UInt64 max_space = 0;
DiskPtr max_disk;
for (const auto & volume : volumes)
{
for (const auto & disk : volume->disks)
{
auto avail_space = disk->getAvailableSpace();
if (avail_space > max_space)
{
max_space = avail_space;
max_disk = disk;
}
}
}
return max_disk->reserve(0);
}
size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const
{
for (size_t i = 0; i < volumes.size(); ++i)
{
const auto & volume = volumes[i];
for (const auto & disk : volume->disks)
if (disk->getName() == disk_ptr->getName())
return i;
}
throw Exception("No disk " + disk_ptr->getName() + " in policy " + name, ErrorCodes::UNKNOWN_DISK);
}
StoragePolicySelector::StoragePolicySelector(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const DiskSelector & disks)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
for (const auto & name : keys)
{
if (!std::all_of(name.begin(), name.end(), isWordCharASCII))
throw Exception("StoragePolicy name can contain only alphanumeric and '_' (" + name + ")",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
policies.emplace(name, std::make_shared<StoragePolicy>(name, config, config_prefix + "." + name, disks));
LOG_INFO(&Logger::get("StoragePolicySelector"), "Storage policy " << name << " loaded");
}
constexpr auto default_storage_policy_name = "default";
constexpr auto default_volume_name = "default";
constexpr auto default_disk_name = "default";
/// Add default policy if it's not specified explicetly
if (policies.find(default_storage_policy_name) == policies.end())
{
auto default_volume = std::make_shared<Volume>(
default_volume_name,
std::vector<DiskPtr>{disks[default_disk_name]},
0);
auto default_policy = std::make_shared<StoragePolicy>(default_storage_policy_name, Volumes{default_volume}, 0.0);
policies.emplace(default_storage_policy_name, default_policy);
}
}
const StoragePolicyPtr & StoragePolicySelector::operator[](const String & name) const
{
auto it = policies.find(name);
if (it == policies.end())
throw Exception("Unknown StoragePolicy " + name, ErrorCodes::UNKNOWN_POLICY);
return it->second;
}
}
}

View File

@ -0,0 +1,359 @@
#pragma once
#include <mutex>
#include <sys/statvfs.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#if defined(__linux__)
#include <cstdio>
#include <mntent.h>
#endif
#include <memory>
#include <filesystem>
#include <boost/noncopyable.hpp>
#include <Poco/Util/AbstractConfiguration.h>
#include <common/logger_useful.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
#include <Common/formatReadable.h>
#include <Common/CurrentMetrics.h>
namespace CurrentMetrics
{
extern const Metric DiskSpaceReservedForMerge;
}
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int CANNOT_STATVFS;
extern const int NOT_ENOUGH_SPACE;
extern const int SYSTEM_ERROR;
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int UNKNOWN_POLICY;
extern const int UNKNOWN_DISK;
}
namespace DiskSpace
{
class Reservation;
using ReservationPtr = std::unique_ptr<Reservation>;
/// Returns mount point of filesystem where absoulte_path (must exist) is located
std::filesystem::path getMountPoint(std::filesystem::path absolute_path);
/// Returns name of filesystem mounted to mount_point
#if !defined(__linux__)
[[noreturn]]
#endif
std::string getFilesystemName([[maybe_unused]] const std::string & mount_point);
inline struct statvfs getStatVFS(const std::string & path)
{
struct statvfs fs;
if (statvfs(path.c_str(), &fs) != 0)
throwFromErrnoWithPath(
"Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS);
return fs;
}
/**
* Provide interface for reservation
*/
class Space : public std::enable_shared_from_this<Space>
{
public:
virtual ReservationPtr reserve(UInt64 bytes) const = 0;
virtual const String & getName() const = 0;
virtual ~Space() = default;
};
using SpacePtr = std::shared_ptr<const Space>;
/** Disk - Smallest space unit.
* path - Path to space. Ends with /
* name - Unique key using for disk space reservation.
*/
class Disk : public Space
{
public:
friend class Reservation;
/// Snapshot of disk space state (free and total space)
class Stat
{
struct statvfs fs{};
UInt64 keep_free_space_bytes;
public:
explicit Stat(const Disk & disk)
{
if (statvfs(disk.path.c_str(), &fs) != 0)
throwFromErrno("Could not calculate available disk space (statvfs)", ErrorCodes::CANNOT_STATVFS);
keep_free_space_bytes = disk.keep_free_space_bytes;
}
/// Total space on disk using information from statvfs
UInt64 getTotalSpace() const;
/// Available space on disk using information from statvfs
UInt64 getAvailableSpace() const;
};
Disk(const String & name_, const String & path_, UInt64 keep_free_space_bytes_)
: name(name_)
, path(path_)
, keep_free_space_bytes(keep_free_space_bytes_)
{
if (path.back() != '/')
throw Exception("Disk path must ends with '/', but '" + path + "' doesn't.", ErrorCodes::LOGICAL_ERROR);
}
/// Reserves bytes on disk, if not possible returns nullptr.
ReservationPtr reserve(UInt64 bytes) const override;
/// Disk name from configuration;
const String & getName() const override { return name; }
/// Path on fs to disk
const String & getPath() const { return path; }
/// Path to clickhouse data directory on this disk
String getClickHouseDataPath() const { return path + "data/"; }
/// Amount of bytes which should be kept free on this disk
UInt64 getKeepingFreeSpace() const { return keep_free_space_bytes; }
/// Snapshot of disk space state (free and total space)
Stat getSpaceInformation() const { return Stat(*this); }
/// Total available space on disk
UInt64 getTotalSpace() const { return getSpaceInformation().getTotalSpace(); }
/// Space currently available on disk, take information from statvfs call
UInt64 getAvailableSpace() const { return getSpaceInformation().getAvailableSpace(); }
/// Currently available (prev method) minus already reserved space
UInt64 getUnreservedSpace() const;
private:
const String name;
const String path;
const UInt64 keep_free_space_bytes;
/// Used for reservation counters modification
static std::mutex mutex;
mutable UInt64 reserved_bytes = 0;
mutable UInt64 reservation_count = 0;
private:
/// Reserves bytes on disk, if not possible returns false
bool tryReserve(UInt64 bytes) const;
};
/// It is not possible to change disk runtime.
using DiskPtr = std::shared_ptr<const Disk>;
using Disks = std::vector<DiskPtr>;
/** Information about reserved size on concrete disk.
* Unreserve on destroy. Doesn't reserve bytes in constructor.
*/
class Reservation final : private boost::noncopyable
{
public:
Reservation(UInt64 size_, DiskPtr disk_ptr_)
: size(size_)
, metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size)
, disk_ptr(disk_ptr_)
{
}
/// Unreserves reserved space and decrement reservations count on disk
~Reservation();
/// Changes amount of reserved space. When new_size is greater than before,
/// availability of free space is not checked.
void update(UInt64 new_size);
/// Get reservation size
UInt64 getSize() const { return size; }
/// Get disk where reservation take place
const DiskPtr & getDisk() const { return disk_ptr; }
private:
UInt64 size;
CurrentMetrics::Increment metric_increment;
DiskPtr disk_ptr;
};
/// Parse .xml configuration and store information about disks
/// Mostly used for introspection.
class DiskSelector
{
public:
DiskSelector(const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const String & default_path);
/// Get disk by name
const DiskPtr & operator[](const String & name) const;
/// Get all disks name
const auto & getDisksMap() const { return disks; }
private:
std::map<String, DiskPtr> disks;
};
/**
* Disks group by some (user) criteria. For example,
* - Volume("slow_disks", [d1, d2], 100)
* - Volume("fast_disks", [d3, d4], 200)
* Cannot store parts larger than max_data_part_size.
*/
class Volume : public Space
{
friend class StoragePolicy;
public:
Volume(String name_, std::vector<DiskPtr> disks_, UInt64 max_data_part_size_)
: max_data_part_size(max_data_part_size_)
, disks(std::move(disks_))
, name(std::move(name_))
{
}
Volume(String name_, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const DiskSelector & disk_selector);
/// Uses Round-robin to choose disk for reservation.
/// Returns valid reservation or nullptr if there is no space left on any disk.
ReservationPtr reserve(UInt64 bytes) const override;
/// Return biggest unreserved space across all disks
UInt64 getMaxUnreservedFreeSpace() const;
/// Volume name from config
const String & getName() const override { return name; }
/// Max size of reservation
UInt64 max_data_part_size = 0;
/// Disks in volume
Disks disks;
private:
mutable std::atomic<size_t> last_used = 0;
const String name;
};
using VolumePtr = std::shared_ptr<const Volume>;
using Volumes = std::vector<VolumePtr>;
/**
* Contains all information about volumes configuration for Storage.
* Can determine appropriate Volume and Disk for each reservation.
*/
class StoragePolicy : public Space
{
public:
StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const DiskSelector & disks);
StoragePolicy(String name_, Volumes volumes_, double move_factor_);
/// Returns disks ordered by volumes priority
Disks getDisks() const;
/// Returns any disk
/// Used when it's not important, for example for
/// mutations files
DiskPtr getAnyDisk() const;
DiskPtr getDiskByName(const String & disk_name) const;
/// Get free space from most free disk
UInt64 getMaxUnreservedFreeSpace() const;
const String & getName() const override { return name; }
/// Returns valid reservation or null
ReservationPtr reserve(UInt64 bytes) const override;
/// Reserve space on any volume with index > min_volume_index
ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const;
/// Find volume index, which contains disk
size_t getVolumeIndexByDisk(const DiskPtr & disk_ptr) const;
/// Reserves 0 bytes on disk with max available space
/// Do not use this function when it is possible to predict size.
ReservationPtr makeEmptyReservationOnLargestDisk() const;
const Volumes & getVolumes() const { return volumes; }
/// Returns number [0., 1.] -- fraction of free space on disk
/// which should be kept with help of background moves
double getMoveFactor() const { return move_factor; }
/// Get volume by index from storage_policy
VolumePtr getVolume(size_t i) const { return (i < volumes_names.size() ? volumes[i] : VolumePtr()); }
VolumePtr getVolumeByName(const String & volume_name) const
{
auto it = volumes_names.find(volume_name);
if (it == volumes_names.end())
return {};
return getVolume(it->second);
}
private:
Volumes volumes;
const String name;
std::map<String, size_t> volumes_names;
/// move_factor from interval [0., 1.]
/// We move something if disk from this policy
/// filled more than total_size * move_factor
double move_factor = 0.1; /// by default move factor is 10%
};
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
/// Parse .xml configuration and store information about policies
/// Mostly used for introspection.
class StoragePolicySelector
{
public:
StoragePolicySelector(const Poco::Util::AbstractConfiguration & config,
const String & config_prefix, const DiskSelector & disks);
/// Policy by name
const StoragePolicyPtr & operator[](const String & name) const;
/// All policies
const std::map<String, StoragePolicyPtr> & getPoliciesMap() const { return policies; }
private:
std::map<String, StoragePolicyPtr> policies;
};
}
}

View File

@ -451,6 +451,12 @@ namespace ErrorCodes
extern const int INVALID_TEMPLATE_FORMAT = 474; extern const int INVALID_TEMPLATE_FORMAT = 474;
extern const int INVALID_WITH_FILL_EXPRESSION = 475; extern const int INVALID_WITH_FILL_EXPRESSION = 475;
extern const int WITH_TIES_WITHOUT_ORDER_BY = 476; extern const int WITH_TIES_WITHOUT_ORDER_BY = 476;
extern const int INVALID_USAGE_OF_INPUT = 477;
extern const int UNKNOWN_POLICY = 478;
extern const int UNKNOWN_DISK = 479;
extern const int UNKNOWN_PROTOCOL = 480;
extern const int PATH_ACCESS_DENIED = 481;
extern const int DICTIONARY_ACCESS_DENIED = 482;
extern const int KEEPER_EXCEPTION = 999; extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000; extern const int POCO_EXCEPTION = 1000;

View File

@ -10,7 +10,7 @@
#include <common/demangle.h> #include <common/demangle.h>
#include <Common/config_version.h> #include <Common/config_version.h>
#include <Common/formatReadable.h> #include <Common/formatReadable.h>
#include <Storages/MergeTree/DiskSpaceMonitor.h> #include <Common/DiskSpaceMonitor.h>
#include <filesystem> #include <filesystem>
namespace DB namespace DB
@ -84,16 +84,16 @@ void getNoSpaceLeftInfoMessage(std::filesystem::path path, std::string & msg)
while (!std::filesystem::exists(path) && path.has_relative_path()) while (!std::filesystem::exists(path) && path.has_relative_path())
path = path.parent_path(); path = path.parent_path();
auto fs = DiskSpaceMonitor::getStatVFS(path); auto fs = DiskSpace::getStatVFS(path);
msg += "\nTotal space: " + formatReadableSizeWithBinarySuffix(fs.f_blocks * fs.f_bsize) msg += "\nTotal space: " + formatReadableSizeWithBinarySuffix(fs.f_blocks * fs.f_bsize)
+ "\nAvailable space: " + formatReadableSizeWithBinarySuffix(fs.f_bavail * fs.f_bsize) + "\nAvailable space: " + formatReadableSizeWithBinarySuffix(fs.f_bavail * fs.f_bsize)
+ "\nTotal inodes: " + formatReadableQuantity(fs.f_files) + "\nTotal inodes: " + formatReadableQuantity(fs.f_files)
+ "\nAvailable inodes: " + formatReadableQuantity(fs.f_favail); + "\nAvailable inodes: " + formatReadableQuantity(fs.f_favail);
auto mount_point = DiskSpaceMonitor::getMountPoint(path).string(); auto mount_point = DiskSpace::getMountPoint(path).string();
msg += "\nMount point: " + mount_point; msg += "\nMount point: " + mount_point;
#if defined(__linux__) #if defined(__linux__)
msg += "\nFilesystem: " + DiskSpaceMonitor::getFilesystemName(mount_point); msg += "\nFilesystem: " + DiskSpace::getFilesystemName(mount_point);
#endif #endif
} }

View File

@ -36,7 +36,6 @@ struct FixedClearableHashMapCell
} }
Key key; Key key;
FixedClearableHashMapCell * ptr; FixedClearableHashMapCell * ptr;
Key & getFirstMutable() { return key; }
const Key & getFirst() const { return key; } const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; } Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return *ptr->mapped; } const Mapped & getSecond() const { return *ptr->mapped; }

View File

@ -23,7 +23,6 @@ struct FixedClearableHashTableCell
struct CellExt struct CellExt
{ {
Key key; Key key;
value_type & getValueMutable() { return key; }
const value_type & getValue() const { return key; } const value_type & getValue() const { return key; }
void update(Key && key_, FixedClearableHashTableCell *) { key = key_; } void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
}; };

View File

@ -39,7 +39,6 @@ struct FixedHashMapCell
Key key; Key key;
FixedHashMapCell * ptr; FixedHashMapCell * ptr;
Key & getFirstMutable() { return key; }
const Key & getFirst() const { return key; } const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; } Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return ptr->mapped; } const Mapped & getSecond() const { return ptr->mapped; }
@ -53,12 +52,53 @@ class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, A
{ {
public: public:
using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>; using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>;
using Self = FixedHashMap;
using key_type = Key; using key_type = Key;
using mapped_type = Mapped; using mapped_type = Mapped;
using value_type = typename Base::cell_type::value_type; using Cell = typename Base::cell_type;
using value_type = typename Cell::value_type;
using Base::Base; using Base::Base;
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
decltype(it) res_it;
bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash());
func(res_it->getSecond(), it->getSecond(), inserted);
}
}
template <typename Func>
void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
decltype(it) res_it = that.find(it->getFirst(), it.getHash());
if (res_it == that.end())
func(it->getSecond(), it->getSecond(), false);
else
func(res_it->getSecond(), it->getSecond(), true);
}
}
template <typename Func>
void forEachValue(Func && func)
{
for (auto & v : *this)
func(v.getFirst(), v.getSecond());
}
template <typename Func>
void forEachMapped(Func && func)
{
for (auto & v : *this)
func(v.getSecond());
}
mapped_type & ALWAYS_INLINE operator[](Key x) mapped_type & ALWAYS_INLINE operator[](Key x)
{ {
typename Base::iterator it; typename Base::iterator it;

View File

@ -28,7 +28,6 @@ struct FixedHashTableCell
{ {
Key key; Key key;
value_type & getValueMutable() { return key; }
const value_type & getValue() const { return key; } const value_type & getValue() const { return key; }
void update(Key && key_, FixedHashTableCell *) { key = key_; } void update(Key && key_, FixedHashTableCell *) { key = key_; }
}; };
@ -262,8 +261,9 @@ public:
iterator end() { return iterator(this, buf + BUFFER_SIZE); } iterator end() { return iterator(this, buf + BUFFER_SIZE); }
protected: public:
void ALWAYS_INLINE emplaceImpl(Key x, iterator & it, bool & inserted) /// The last parameter is unused but exists for compatibility with HashTable interface.
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t /* hash */ = 0)
{ {
it = iterator(this, &buf[x]); it = iterator(this, &buf[x]);
@ -278,22 +278,16 @@ protected:
++m_size; ++m_size;
} }
public:
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x) std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
{ {
std::pair<iterator, bool> res; std::pair<iterator, bool> res;
emplaceImpl(Cell::getKey(x), res.first, res.second); emplace(Cell::getKey(x), res.first, res.second);
if (res.second) if (res.second)
res.first.ptr->setMapped(x); res.first.ptr->setMapped(x);
return res; return res;
} }
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) { emplaceImpl(x, it, inserted); }
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t) { emplaceImpl(x, it, inserted); }
iterator ALWAYS_INLINE find(Key x) iterator ALWAYS_INLINE find(Key x)
{ {
return !buf[x].isZero(*this) ? iterator(this, &buf[x]) : end(); return !buf[x].isZero(*this) ? iterator(this, &buf[x]) : end();

View File

@ -49,12 +49,10 @@ struct HashMapCell
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {} HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
HashMapCell(const value_type & value_, const State &) : value(value_) {} HashMapCell(const value_type & value_, const State &) : value(value_) {}
Key & getFirstMutable() { return value.first; }
const Key & getFirst() const { return value.first; } const Key & getFirst() const { return value.first; }
Mapped & getSecond() { return value.second; } Mapped & getSecond() { return value.second; }
const Mapped & getSecond() const { return value.second; } const Mapped & getSecond() const { return value.second; }
value_type & getValueMutable() { return value; }
const value_type & getValue() const { return value; } const value_type & getValue() const { return value; }
static const Key & getKey(const value_type & value) { return value.first; } static const Key & getKey(const value_type & value) { return value.first; }
@ -137,12 +135,65 @@ template <
class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
{ {
public: public:
using Self = HashMapTable;
using key_type = Key; using key_type = Key;
using mapped_type = typename Cell::Mapped; using mapped_type = typename Cell::Mapped;
using value_type = typename Cell::value_type; using value_type = typename Cell::value_type;
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable; using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
/// Merge every cell's value of current map into the destination map via emplace.
/// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, a new cell gets emplaced into that map,
/// and func is invoked with the third argument emplaced set to true. Otherwise
/// emplaced is set to false.
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
decltype(it) res_it;
bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash());
func(res_it->getSecond(), it->getSecond(), inserted);
}
}
/// Merge every cell's value of current map into the destination map via find.
/// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, func is invoked with the third argument
/// exist set to false. Otherwise exist is set to true.
template <typename Func>
void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
decltype(it) res_it = that.find(it->getFirst(), it.getHash());
if (res_it == that.end())
func(it->getSecond(), it->getSecond(), false);
else
func(res_it->getSecond(), it->getSecond(), true);
}
}
/// Call func(const Key &, Mapped &) for each hash map element.
template <typename Func>
void forEachValue(Func && func)
{
for (auto & v : *this)
func(v.getFirst(), v.getSecond());
}
/// Call func(Mapped &) for each hash map element.
template <typename Func>
void forEachMapped(Func && func)
{
for (auto & v : *this)
func(v.getSecond());
}
mapped_type & ALWAYS_INLINE operator[](Key x) mapped_type & ALWAYS_INLINE operator[](Key x)
{ {
typename HashMapTable::iterator it; typename HashMapTable::iterator it;

View File

@ -21,6 +21,7 @@
#include <IO/VarInt.h> #include <IO/VarInt.h>
#include <Common/HashTable/HashTableAllocator.h> #include <Common/HashTable/HashTableAllocator.h>
#include <Common/HashTable/HashTableKeyHolder.h>
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES #ifdef DBMS_HASH_MAP_DEBUG_RESIZES
#include <iostream> #include <iostream>
@ -97,7 +98,6 @@ struct HashTableCell
HashTableCell(const Key & key_, const State &) : key(key_) {} HashTableCell(const Key & key_, const State &) : key(key_) {}
/// Get what the value_type of the container will be. /// Get what the value_type of the container will be.
value_type & getValueMutable() { return key; }
const value_type & getValue() const { return key; } const value_type & getValue() const { return key; }
/// Get the key. /// Get the key.
@ -224,8 +224,18 @@ private:
public: public:
bool hasZero() const { return has_zero; } bool hasZero() const { return has_zero; }
void setHasZero() { has_zero = true; }
void clearHasZero() { has_zero = false; } void setHasZero()
{
has_zero = true;
new (zeroValue()) Cell();
}
void clearHasZero()
{
has_zero = false;
zeroValue()->~Cell();
}
Cell * zeroValue() { return reinterpret_cast<Cell*>(&zero_value_storage); } Cell * zeroValue() { return reinterpret_cast<Cell*>(&zero_value_storage); }
const Cell * zeroValue() const { return reinterpret_cast<const Cell*>(&zero_value_storage); } const Cell * zeroValue() const { return reinterpret_cast<const Cell*>(&zero_value_storage); }
@ -630,6 +640,8 @@ protected:
/// If the key is zero, insert it into a special place and return true. /// If the key is zero, insert it into a special place and return true.
/// We don't have to persist a zero key, because it's not actually inserted.
/// That's why we just take a Key by value, an not a key holder.
bool ALWAYS_INLINE emplaceIfZero(Key x, iterator & it, bool & inserted, size_t hash_value) bool ALWAYS_INLINE emplaceIfZero(Key x, iterator & it, bool & inserted, size_t hash_value)
{ {
/// If it is claimed that the zero key can not be inserted into the table. /// If it is claimed that the zero key can not be inserted into the table.
@ -655,17 +667,23 @@ protected:
return false; return false;
} }
void ALWAYS_INLINE emplaceNonZeroImpl(size_t place_value, Key x, iterator & it, bool & inserted, size_t hash_value) template <typename KeyHolder>
void ALWAYS_INLINE emplaceNonZeroImpl(size_t place_value, KeyHolder && key_holder,
iterator & it, bool & inserted, size_t hash_value)
{ {
it = iterator(this, &buf[place_value]); it = iterator(this, &buf[place_value]);
if (!buf[place_value].isZero(*this)) if (!buf[place_value].isZero(*this))
{ {
keyHolderDiscardKey(key_holder);
inserted = false; inserted = false;
return; return;
} }
new(&buf[place_value]) Cell(x, *this); keyHolderPersistKey(key_holder);
const auto & key = keyHolderGetKey(key_holder);
new(&buf[place_value]) Cell(key, *this);
buf[place_value].setHash(hash_value); buf[place_value].setHash(hash_value);
inserted = true; inserted = true;
++m_size; ++m_size;
@ -687,19 +705,21 @@ protected:
throw; throw;
} }
it = find(x, hash_value); it = find(keyHolderGetKey(key_holder), hash_value);
} }
} }
/// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter. /// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
void ALWAYS_INLINE emplaceNonZero(Key x, iterator & it, bool & inserted, size_t hash_value) template <typename KeyHolder>
void ALWAYS_INLINE emplaceNonZero(KeyHolder && key_holder, iterator & it,
bool & inserted, size_t hash_value)
{ {
size_t place_value = findCell(x, hash_value, grower.place(hash_value)); const auto & key = keyHolderGetKey(key_holder);
emplaceNonZeroImpl(place_value, x, it, inserted, hash_value); size_t place_value = findCell(key, hash_value, grower.place(hash_value));
emplaceNonZeroImpl(place_value, key_holder, it, inserted, hash_value);
} }
public: public:
/// Insert a value. In the case of any more complex values, it is better to use the `emplace` function. /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x) std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
@ -708,7 +728,9 @@ public:
size_t hash_value = hash(Cell::getKey(x)); size_t hash_value = hash(Cell::getKey(x));
if (!emplaceIfZero(Cell::getKey(x), res.first, res.second, hash_value)) if (!emplaceIfZero(Cell::getKey(x), res.first, res.second, hash_value))
{
emplaceNonZero(Cell::getKey(x), res.first, res.second, hash_value); emplaceNonZero(Cell::getKey(x), res.first, res.second, hash_value);
}
if (res.second) if (res.second)
res.first.ptr->setMapped(x); res.first.ptr->setMapped(x);
@ -739,19 +761,20 @@ public:
* if (inserted) * if (inserted)
* new(&it->second) Mapped(value); * new(&it->second) Mapped(value);
*/ */
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it, bool & inserted)
{ {
size_t hash_value = hash(x); const auto & key = keyHolderGetKey(key_holder);
if (!emplaceIfZero(x, it, inserted, hash_value)) emplace(key_holder, it, inserted, hash(key));
emplaceNonZero(x, it, inserted, hash_value);
} }
template <typename KeyHolder>
/// Same, but with a precalculated value of hash function. void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it,
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value) bool & inserted, size_t hash_value)
{ {
if (!emplaceIfZero(x, it, inserted, hash_value)) const auto & key = keyHolderGetKey(key_holder);
emplaceNonZero(x, it, inserted, hash_value); if (!emplaceIfZero(key, it, inserted, hash_value))
emplaceNonZero(key_holder, it, inserted, hash_value);
} }
/// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet. /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.

View File

@ -0,0 +1,130 @@
#pragma once
#include <Common/Arena.h>
/**
* In some aggregation scenarios, when adding a key to the hash table, we
* start with a temporary key object, and if it turns out to be a new key,
* we must make it persistent (e.g. copy to an Arena) and use the resulting
* persistent object as hash table key. This happens only for StringRef keys,
* because other key types are stored by value, but StringRef is a pointer-like
* type: the actual data are stored elsewhere. Even for StringRef, we don't
* make a persistent copy of the key in each of the following cases:
* 1) the aggregation method doesn't use temporary keys, so they're persistent
* from the start;
* 1) the key is already present in the hash table;
* 3) that particular key is stored by value, e.g. a short StringRef key in
* StringHashMap.
*
* In the past, the caller was responsible for making the key persistent after
* in was inserted. emplace() returned whether the key is new or not, so the
* caller only stored new keys (this is case (2) from the above list). However,
* now we are adding a compound hash table for StringRef keys, so case (3)
* appears. The decision about persistence now depends on some properties of
* the key, and the logic of this decision is tied to the particular hash table
* implementation. This means that the hash table user now doesn't have enough
* data and logic to make this decision by itself.
*
* To support these new requirements, we now manage key persistence by passing
* a special key holder to emplace(), which has the functions to make the key
* persistent or to discard it. emplace() then calls these functions at the
* appropriate moments.
*
* This approach has the following benefits:
* - no extra runtime branches in the caller to make the key persistent.
* - no additional data is stored in the hash table itself, which is important
* when it's used in aggregate function states.
* - no overhead when the key memory management isn't needed: we just pass the
* bare key without any wrapper to emplace(), and the default callbacks do
* nothing.
*
* This file defines the default key persistence functions, as well as two
* different key holders and corresponding functions for storing StringRef
* keys to Arena.
*/
/**
* Returns the key. Can return the temporary key initially.
* After the call to keyHolderPersistKey(), must return the persistent key.
*/
template <typename Key>
inline Key & ALWAYS_INLINE keyHolderGetKey(Key && key) { return key; }
/**
* Make the key persistent. keyHolderGetKey() must return the persistent key
* after this call.
*/
template <typename Key>
inline void ALWAYS_INLINE keyHolderPersistKey(Key &&) {}
/**
* Discard the key. Calling keyHolderGetKey() is ill-defined after this.
*/
template <typename Key>
inline void ALWAYS_INLINE keyHolderDiscardKey(Key &&) {}
namespace DB
{
/**
* ArenaKeyHolder is a key holder for hash tables that serializes a StringRef
* key to an Arena.
*/
struct ArenaKeyHolder
{
StringRef key;
Arena & pool;
};
}
inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::ArenaKeyHolder & holder)
{
return holder.key;
}
inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder)
{
// Hash table shouldn't ask us to persist a zero key
assert(holder.key.size > 0);
holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
}
inline void ALWAYS_INLINE keyHolderDiscardKey(DB::ArenaKeyHolder &)
{
}
namespace DB
{
/**
* SerializedKeyHolder is a key holder for a StringRef key that is already
* serialized to an Arena. The key must be the last allocation in this Arena,
* and is discarded by rolling back the allocation.
*/
struct SerializedKeyHolder
{
StringRef key;
Arena & pool;
};
}
inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::SerializedKeyHolder & holder)
{
return holder.key;
}
inline void ALWAYS_INLINE keyHolderPersistKey(DB::SerializedKeyHolder &)
{
}
inline void ALWAYS_INLINE keyHolderDiscardKey(DB::SerializedKeyHolder & holder)
{
[[maybe_unused]] void * new_head = holder.pool.rollback(holder.key.size);
assert(new_head == holder.key.data);
holder.key.data = nullptr;
holder.key.size = 0;
}

View File

@ -22,6 +22,13 @@ public:
using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable; using TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>::TwoLevelHashTable;
template <typename Func>
void ALWAYS_INLINE forEachMapped(Func && func)
{
for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
this->impls[i].forEachMapped(func);
}
mapped_type & ALWAYS_INLINE operator[](Key x) mapped_type & ALWAYS_INLINE operator[](Key x)
{ {
typename TwoLevelHashMapTable::iterator it; typename TwoLevelHashMapTable::iterator it;

View File

@ -235,19 +235,22 @@ public:
* if (inserted) * if (inserted)
* new(&it->second) Mapped(value); * new(&it->second) Mapped(value);
*/ */
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it, bool & inserted)
{ {
size_t hash_value = hash(x); size_t hash_value = hash(keyHolderGetKey(key_holder));
emplace(x, it, inserted, hash_value); emplace(key_holder, it, inserted, hash_value);
} }
/// Same, but with a precalculated values of hash function. /// Same, but with a precalculated values of hash function.
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value) template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it,
bool & inserted, size_t hash_value)
{ {
size_t buck = getBucketFromHash(hash_value); size_t buck = getBucketFromHash(hash_value);
typename Impl::iterator impl_it; typename Impl::iterator impl_it;
impls[buck].emplace(x, impl_it, inserted, hash_value); impls[buck].emplace(key_holder, impl_it, inserted, hash_value);
it = iterator(this, buck, impl_it); it = iterator(this, buck, impl_it);
} }

View File

@ -0,0 +1,9 @@
#pragma once
#define __msan_unpoison(X, Y)
#if defined(__has_feature)
# if __has_feature(memory_sanitizer)
# undef __msan_unpoison
# include <sanitizer/msan_interface.h>
# endif
#endif

View File

@ -1,3 +1,5 @@
#include <Common/config.h>
#if USE_POCO_NETSSL
#include "OpenSSLHelpers.h" #include "OpenSSLHelpers.h"
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <openssl/err.h> #include <openssl/err.h>
@ -16,3 +18,4 @@ String getOpenSSLErrors()
} }
} }
#endif

View File

@ -1,4 +1,6 @@
#pragma once #pragma once
#include <Common/config.h>
#if USE_POCO_NETSSL
#include <Core/Types.h> #include <Core/Types.h>
@ -10,3 +12,4 @@ namespace DB
String getOpenSSLErrors(); String getOpenSSLErrors();
} }
#endif

View File

@ -56,9 +56,11 @@ public:
PoolWithFailoverBase( PoolWithFailoverBase(
NestedPools nested_pools_, NestedPools nested_pools_,
time_t decrease_error_period_, time_t decrease_error_period_,
size_t max_error_cap_,
Logger * log_) Logger * log_)
: nested_pools(std::move(nested_pools_)) : nested_pools(std::move(nested_pools_))
, decrease_error_period(decrease_error_period_) , decrease_error_period(decrease_error_period_)
, max_error_cap(max_error_cap_)
, shared_pool_states(nested_pools.size()) , shared_pool_states(nested_pools.size())
, log(log_) , log(log_)
{ {
@ -120,12 +122,14 @@ protected:
/// This function returns a copy of pool states to avoid race conditions when modifying shared pool states. /// This function returns a copy of pool states to avoid race conditions when modifying shared pool states.
PoolStates updatePoolStates(); PoolStates updatePoolStates();
PoolStates getPoolStates() const;
NestedPools nested_pools; NestedPools nested_pools;
const time_t decrease_error_period; const time_t decrease_error_period;
const size_t max_error_cap;
std::mutex pool_states_mutex; mutable std::mutex pool_states_mutex;
PoolStates shared_pool_states; PoolStates shared_pool_states;
/// The time when error counts were last decreased. /// The time when error counts were last decreased.
time_t last_error_decrease_time = 0; time_t last_error_decrease_time = 0;
@ -193,7 +197,10 @@ PoolWithFailoverBase<TNestedPool>::getMany(
{ {
std::lock_guard lock(pool_states_mutex); std::lock_guard lock(pool_states_mutex);
for (const ShuffledPool & pool: shuffled_pools) for (const ShuffledPool & pool: shuffled_pools)
shared_pool_states[pool.index].error_count += pool.error_count; {
auto & pool_state = shared_pool_states[pool.index];
pool_state.error_count = std::min(max_error_cap, static_cast<size_t>(pool_state.error_count + pool.error_count));
}
}); });
std::string fail_messages; std::string fail_messages;
@ -236,7 +243,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
<< (shuffled_pool.error_count + 1) << ", reason: " << fail_message); << (shuffled_pool.error_count + 1) << ", reason: " << fail_message);
ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
++shuffled_pool.error_count; shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);
if (shuffled_pool.error_count >= max_tries) if (shuffled_pool.error_count >= max_tries)
{ {
@ -297,7 +304,8 @@ void PoolWithFailoverBase<TNestedPool>::reportError(const Entry & entry)
if (nested_pools[i]->contains(entry)) if (nested_pools[i]->contains(entry))
{ {
std::lock_guard lock(pool_states_mutex); std::lock_guard lock(pool_states_mutex);
++shared_pool_states[i].error_count; auto & pool_state = shared_pool_states[i];
pool_state.error_count = std::min(max_error_cap, pool_state.error_count + 1);
return; return;
} }
} }
@ -373,3 +381,11 @@ PoolWithFailoverBase<TNestedPool>::updatePoolStates()
} }
return result; return result;
} }
template <typename TNestedPool>
typename PoolWithFailoverBase<TNestedPool>::PoolStates
PoolWithFailoverBase<TNestedPool>::getPoolStates() const
{
std::lock_guard lock(pool_states_mutex);
return shared_pool_states;
}

View File

@ -30,10 +30,13 @@ namespace
/// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler. /// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
constexpr size_t QUERY_ID_MAX_LEN = 1024; constexpr size_t QUERY_ID_MAX_LEN = 1024;
# if !defined(__APPLE__)
thread_local size_t write_trace_iteration = 0; thread_local size_t write_trace_iteration = 0;
#endif
void writeTraceInfo(TimerType timer_type, int /* sig */, siginfo_t * info, void * context) void writeTraceInfo(TimerType timer_type, int /* sig */, siginfo_t * info, void * context)
{ {
# if !defined(__APPLE__)
/// Quickly drop if signal handler is called too frequently. /// Quickly drop if signal handler is called too frequently.
/// Otherwise we may end up infinitelly processing signals instead of doing any useful work. /// Otherwise we may end up infinitelly processing signals instead of doing any useful work.
++write_trace_iteration; ++write_trace_iteration;
@ -50,6 +53,9 @@ namespace
return; return;
} }
} }
#else
UNUSED(info);
#endif
constexpr size_t buf_size = sizeof(char) + // TraceCollector stop flag constexpr size_t buf_size = sizeof(char) + // TraceCollector stop flag
8 * sizeof(char) + // maximum VarUInt length for string size 8 * sizeof(char) + // maximum VarUInt length for string size

View File

@ -4,8 +4,6 @@
#include <Common/CurrentMetrics.h> #include <Common/CurrentMetrics.h>
#include <Common/ProfileEvents.h> #include <Common/ProfileEvents.h>
#include <cassert>
namespace ProfileEvents namespace ProfileEvents
{ {
@ -35,22 +33,48 @@ namespace ErrorCodes
} }
/** A single-use object that represents lock's ownership
* For the purpose of exception safety guarantees LockHolder is to be used in two steps:
* 1. Create an instance (allocating all the memory needed)
* 2. Associate the instance with the lock (attach to the lock and locking request group)
*/
class RWLockImpl::LockHolderImpl class RWLockImpl::LockHolderImpl
{ {
bool bound{false};
Type lock_type;
String query_id;
CurrentMetrics::Increment active_client_increment;
RWLock parent; RWLock parent;
GroupsContainer::iterator it_group; GroupsContainer::iterator it_group;
ClientsContainer::iterator it_client;
QueryIdToHolder::key_type query_id;
CurrentMetrics::Increment active_client_increment;
LockHolderImpl(RWLock && parent, GroupsContainer::iterator it_group, ClientsContainer::iterator it_client);
public: public:
LockHolderImpl(const LockHolderImpl & other) = delete; LockHolderImpl(const LockHolderImpl & other) = delete;
LockHolderImpl& operator=(const LockHolderImpl & other) = delete;
/// Implicit memory allocation for query_id is done here
LockHolderImpl(const String & query_id_, Type type)
: lock_type{type}, query_id{query_id_},
active_client_increment{
type == Type::Read ? CurrentMetrics::RWLockActiveReaders : CurrentMetrics::RWLockActiveWriters}
{
}
~LockHolderImpl(); ~LockHolderImpl();
private:
/// A separate method which binds the lock holder to the owned lock
/// N.B. It is very important that this method produces no allocations
bool bind_with(RWLock && parent_, GroupsContainer::iterator it_group_) noexcept
{
if (bound)
return false;
it_group = it_group_;
parent = std::move(parent_);
++it_group->refererrs;
bound = true;
return true;
}
friend class RWLockImpl; friend class RWLockImpl;
}; };
@ -62,29 +86,33 @@ namespace
class QueryLockInfo class QueryLockInfo
{ {
private: private:
std::mutex mutex; mutable std::mutex mutex;
std::map<std::string, size_t> queries; std::map<std::string, size_t> queries;
public: public:
void add(const String & query_id) void add(const String & query_id)
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
++queries[query_id];
const auto res = queries.emplace(query_id, 1); // may throw
if (!res.second)
++res.first->second;
} }
void remove(const String & query_id) void remove(const String & query_id) noexcept
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
auto it = queries.find(query_id);
assert(it != queries.end()); const auto query_it = queries.find(query_id);
if (--it->second == 0) if (query_it != queries.cend() && --query_it->second == 0)
queries.erase(it); queries.erase(query_it);
} }
void check(const String & query_id) void check(const String & query_id) const
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
if (queries.count(query_id))
if (queries.find(query_id) != queries.cend())
throw Exception("Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); throw Exception("Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED);
} }
}; };
@ -93,8 +121,16 @@ namespace
} }
/** To guarantee that we do not get any piece of our data corrupted:
* 1. Perform all actions that include allocations before changing lock's internal state
* 2. Roll back any changes that make the state inconsistent
*
* Note: "SM" in the commentaries below stands for STATE MODIFICATION
*/
RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id) RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id)
{ {
const bool request_has_query_id = query_id != NO_QUERY;
Stopwatch watch(CLOCK_MONOTONIC_COARSE); Stopwatch watch(CLOCK_MONOTONIC_COARSE);
CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders
: CurrentMetrics::RWLockWaitingWriters); : CurrentMetrics::RWLockWaitingWriters);
@ -106,29 +142,39 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
: ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds()); : ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds());
}; };
GroupsContainer::iterator it_group;
ClientsContainer::iterator it_client;
/// This object is placed above unique_lock, because it may lock in destructor. /// This object is placed above unique_lock, because it may lock in destructor.
LockHolder res; auto lock_holder = std::make_shared<LockHolderImpl>(query_id, type);
std::unique_lock lock(mutex); std::unique_lock lock(mutex);
/// Check if the same query is acquiring previously acquired lock /// The FastPath:
if (query_id != RWLockImpl::NO_QUERY) /// Check if the same query_id already holds the required lock in which case we can proceed without waiting
if (request_has_query_id)
{ {
auto it_query = query_id_to_holder.find(query_id); const auto it_query = owner_queries.find(query_id);
if (it_query != query_id_to_holder.end()) if (it_query != owner_queries.end())
res = it_query->second.lock(); {
} const auto current_owner_group = queue.begin();
if (res) /// XXX: it means we can't upgrade lock from read to write!
{ if (type == Write)
/// XXX: it means we can't upgrade lock from read to write - with proper waiting! throw Exception(
if (type != Read || res->it_group->type != Read) "RWLockImpl::getLock(): Cannot acquire exclusive lock while RWLock is already locked",
throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR); ErrorCodes::LOGICAL_ERROR);
else
return res; if (current_owner_group->type == Write)
throw Exception(
"RWLockImpl::getLock(): RWLock is already locked in exclusive mode",
ErrorCodes::LOGICAL_ERROR);
/// N.B. Type is Read here, query_id is not empty and it_query is a valid iterator
all_read_locks.add(query_id); /// SM1: may throw on insertion (nothing to roll back)
++it_query->second; /// SM2: nothrow
lock_holder->bind_with(shared_from_this(), current_owner_group); /// SM3: nothrow
finalize_metrics();
return lock_holder;
}
} }
/** If the query already has any active read lock and tries to acquire another read lock /** If the query already has any active read lock and tries to acquire another read lock
@ -148,86 +194,106 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
if (type == Type::Write || queue.empty() || queue.back().type == Type::Write) if (type == Type::Write || queue.empty() || queue.back().type == Type::Write)
{ {
if (type == Type::Read && !queue.empty() && queue.back().type == Type::Write && query_id != RWLockImpl::NO_QUERY) if (type == Type::Read && request_has_query_id && !queue.empty())
all_read_locks.check(query_id); all_read_locks.check(query_id);
/// Create new group of clients /// Create a new group of locking requests
it_group = queue.emplace(queue.end(), type); queue.emplace_back(type); /// SM1: may throw (nothing to roll back)
} }
else else if (request_has_query_id && queue.size() > 1)
{ all_read_locks.check(query_id);
/// Will append myself to last group
it_group = std::prev(queue.end());
if (it_group != queue.begin() && query_id != RWLockImpl::NO_QUERY) GroupsContainer::iterator it_group = std::prev(queue.end());
all_read_locks.check(query_id);
}
/// Append myself to the end of chosen group /// We need to reference the associated group before waiting to guarantee
auto & clients = it_group->clients; /// that this group does not get deleted prematurely
try ++it_group->refererrs;
{
it_client = clients.emplace(clients.end(), type);
}
catch (...)
{
/// Remove group if it was the first client in the group and an error occurred
if (clients.empty())
queue.erase(it_group);
throw;
}
res.reset(new LockHolderImpl(shared_from_this(), it_group, it_client));
/// Wait a notification until we will be the only in the group. /// Wait a notification until we will be the only in the group.
it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); }); it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); });
/// Insert myself (weak_ptr to the holder) to queries set to implement recursive lock --it_group->refererrs;
if (query_id != RWLockImpl::NO_QUERY)
{
query_id_to_holder.emplace(query_id, res);
if (type == Type::Read) if (request_has_query_id)
all_read_locks.add(query_id); {
try
{
if (type == Type::Read)
all_read_locks.add(query_id); /// SM2: may throw on insertion
/// and is safe to roll back unconditionally
const auto emplace_res =
owner_queries.emplace(query_id, 1); /// SM3: may throw on insertion
if (!emplace_res.second)
++emplace_res.first->second; /// SM4: nothrow
}
catch (...)
{
/// Methods std::list<>::emplace_back() and std::unordered_map<>::emplace() provide strong exception safety
/// We only need to roll back the changes to these objects: all_read_locks and the locking queue
if (type == Type::Read)
all_read_locks.remove(query_id); /// Rollback(SM2): nothrow
if (it_group->refererrs == 0)
{
const auto next = queue.erase(it_group); /// Rollback(SM1): nothrow
if (next != queue.end())
next->cv.notify_all();
}
throw;
}
} }
res->query_id = query_id;
lock_holder->bind_with(shared_from_this(), it_group); /// SM: nothrow
finalize_metrics(); finalize_metrics();
return res; return lock_holder;
} }
/** The sequence points of acquiring lock's ownership by an instance of LockHolderImpl:
* 1. all_read_locks is updated
* 2. owner_queries is updated
* 3. request group is updated by LockHolderImpl which in turn becomes "bound"
*
* If by the time when destructor of LockHolderImpl is called the instance has been "bound",
* it is guaranteed that all three steps have been executed successfully and the resulting state is consistent.
* With the mutex locked the order of steps to restore the lock's state can be arbitrary
*
* We do not employ try-catch: if something bad happens, there is nothing we can do =(
*/
RWLockImpl::LockHolderImpl::~LockHolderImpl() RWLockImpl::LockHolderImpl::~LockHolderImpl()
{ {
if (!bound || parent == nullptr)
return;
std::lock_guard lock(parent->mutex); std::lock_guard lock(parent->mutex);
/// Remove weak_ptrs to the holder, since there are no owners of the current lock /// The associated group must exist (and be the beginning of the queue?)
parent->query_id_to_holder.erase(query_id); if (parent->queue.empty() || it_group != parent->queue.begin())
return;
if (*it_client == RWLockImpl::Read && query_id != RWLockImpl::NO_QUERY) /// If query_id is not empty it must be listed in parent->owner_queries
all_read_locks.remove(query_id); if (query_id != RWLockImpl::NO_QUERY)
/// Removes myself from client list of our group
it_group->clients.erase(it_client);
/// Remove the group if we were the last client and notify the next group
if (it_group->clients.empty())
{ {
auto & parent_queue = parent->queue; const auto owner_it = parent->owner_queries.find(query_id);
parent_queue.erase(it_group); if (owner_it != parent->owner_queries.end())
{
if (--owner_it->second == 0) /// SM: nothrow
parent->owner_queries.erase(owner_it); /// SM: nothrow
if (!parent_queue.empty()) if (lock_type == RWLockImpl::Read)
parent_queue.front().cv.notify_all(); all_read_locks.remove(query_id); /// SM: nothrow
}
}
/// If we are the last remaining referrer, remove the group and notify the next group
if (--it_group->refererrs == 0) /// SM: nothrow
{
const auto next = parent->queue.erase(it_group); /// SM: nothrow
if (next != parent->queue.end())
next->cv.notify_all();
} }
} }
RWLockImpl::LockHolderImpl::LockHolderImpl(RWLock && parent_, RWLockImpl::GroupsContainer::iterator it_group_,
RWLockImpl::ClientsContainer::iterator it_client_)
: parent{std::move(parent_)}, it_group{it_group_}, it_client{it_client_},
active_client_increment{(*it_client == RWLockImpl::Read) ? CurrentMetrics::RWLockActiveReaders
: CurrentMetrics::RWLockActiveWriters}
{
}
} }

View File

@ -8,6 +8,7 @@
#include <condition_variable> #include <condition_variable>
#include <map> #include <map>
#include <string> #include <string>
#include <unordered_map>
namespace DB namespace DB
@ -53,25 +54,24 @@ private:
struct Group; struct Group;
using GroupsContainer = std::list<Group>; using GroupsContainer = std::list<Group>;
using ClientsContainer = std::list<Type>; using OwnerQueryIds = std::unordered_map<String, size_t>;
using QueryIdToHolder = std::map<String, std::weak_ptr<LockHolderImpl>>;
/// Group of clients that should be executed concurrently /// Group of locking requests that should be granted concurrently
/// i.e. a group could contain several readers, but only one writer /// i.e. a group can contain several readers, but only one writer
struct Group struct Group
{ {
// FIXME: there is only redundant |type| information inside |clients|.
const Type type; const Type type;
ClientsContainer clients; size_t refererrs;
std::condition_variable cv; /// all clients of the group wait group condvar std::condition_variable cv; /// all locking requests of the group wait on this condvar
explicit Group(Type type_) : type{type_} {} explicit Group(Type type_) : type{type_}, refererrs{0} {}
}; };
mutable std::mutex mutex;
GroupsContainer queue; GroupsContainer queue;
QueryIdToHolder query_id_to_holder; OwnerQueryIds owner_queries;
mutable std::mutex mutex;
}; };

View File

@ -24,6 +24,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int CANNOT_COMPILE_REGEXP; extern const int CANNOT_COMPILE_REGEXP;
extern const int LOGICAL_ERROR;
extern const int NO_ELEMENTS_IN_CONFIG; extern const int NO_ELEMENTS_IN_CONFIG;
extern const int INVALID_CONFIG_PARAMETER; extern const int INVALID_CONFIG_PARAMETER;
} }
@ -38,7 +39,9 @@ private:
const RE2 regexp; const RE2 regexp;
const re2::StringPiece replacement; const re2::StringPiece replacement;
#ifndef NDEBUG
mutable std::atomic<std::uint64_t> matches_count = 0; mutable std::atomic<std::uint64_t> matches_count = 0;
#endif
public: public:
//* TODO: option with hyperscan? https://software.intel.com/en-us/articles/why-and-how-to-replace-pcre-with-hyperscan //* TODO: option with hyperscan? https://software.intel.com/en-us/articles/why-and-how-to-replace-pcre-with-hyperscan
@ -61,15 +64,37 @@ public:
uint64_t apply(std::string & data) const uint64_t apply(std::string & data) const
{ {
auto m = RE2::GlobalReplace(&data, regexp, replacement); auto m = RE2::GlobalReplace(&data, regexp, replacement);
#ifndef NDEBUG
matches_count += m; matches_count += m;
#endif
return m; return m;
} }
const std::string & getName() const { return name; } const std::string & getName() const { return name; }
const std::string & getReplacementString() const { return replacement_string; } const std::string & getReplacementString() const { return replacement_string; }
#ifndef NDEBUG
uint64_t getMatchesCount() const { return matches_count; } uint64_t getMatchesCount() const { return matches_count; }
#endif
}; };
std::unique_ptr<SensitiveDataMasker> SensitiveDataMasker::sensitive_data_masker = nullptr;
void SensitiveDataMasker::setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_)
{
if (!sensitive_data_masker_)
throw Exception("Logical error: the 'sensitive_data_masker' is not set", ErrorCodes::LOGICAL_ERROR);
if (sensitive_data_masker_->rulesCount() > 0)
{
sensitive_data_masker = std::move(sensitive_data_masker_);
}
}
SensitiveDataMasker * SensitiveDataMasker::getInstance()
{
return sensitive_data_masker.get();
}
SensitiveDataMasker::SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) SensitiveDataMasker::SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
{ {

View File

@ -4,7 +4,6 @@
#include <vector> #include <vector>
#include <cstdint> #include <cstdint>
namespace Poco namespace Poco
{ {
namespace Util namespace Util
@ -13,6 +12,32 @@ namespace Util
} }
} }
/// SensitiveDataMasker allows to remove sensitive data from queries using set of regexp-based rules
/// It's used as a singelton via getInstance method
/// Initially it's empty (nullptr) and after manual initialization
/// (one-time, done by setInstance call) it takes the proper value which
/// is stored in unique_ptr.
/// It looks like the singelton is the best option here, as
/// two users of that object (OwnSplitChannel & Interpreters/executeQuery)
/// can't own/share that Masker properly without syncronization & locks,
/// and we can't afford setting global locks for each logged line.
/// I've considered singleton alternatives, but it's unclear who should own the object,
/// and it introduce unnecessary complexity in implementation (passing references back and forward):
///
/// context can't own, as Context is destroyed before logger,
/// and logger lives longer and logging can still happen after Context destruction.
/// resetting masker in the logger at the moment of
/// context destruction can't be done w/o synchronization / locks in a safe manner.
///
/// logger is Poco derived and i didn't want to brake it's interface,
/// also logger can be dynamically reconfigured without server restart,
/// and it actually recreates OwnSplitChannel when reconfiguration happen,
/// so that makes it's quite tricky. So it a bad candidate for owning masker too.
namespace DB namespace DB
{ {
class SensitiveDataMasker class SensitiveDataMasker
@ -20,6 +45,7 @@ class SensitiveDataMasker
private: private:
class MaskingRule; class MaskingRule;
std::vector<std::unique_ptr<MaskingRule>> all_masking_rules; std::vector<std::unique_ptr<MaskingRule>> all_masking_rules;
static std::unique_ptr<SensitiveDataMasker> sensitive_data_masker;
public: public:
SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); SensitiveDataMasker(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
@ -28,6 +54,11 @@ public:
/// Returns the number of matched rules. /// Returns the number of matched rules.
size_t wipeSensitiveData(std::string & data) const; size_t wipeSensitiveData(std::string & data) const;
/// setInstance is not thread-safe and should be called once in single-thread mode.
/// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367
static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_);
static SensitiveDataMasker * getInstance();
/// Used in tests. /// Used in tests.
void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string); void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string);

View File

@ -6,6 +6,7 @@
#include <Common/config.h> #include <Common/config.h>
#include <common/SimpleCache.h> #include <common/SimpleCache.h>
#include <common/demangle.h> #include <common/demangle.h>
#include <Core/Defines.h>
#include <cstring> #include <cstring>
#include <filesystem> #include <filesystem>

View File

@ -361,6 +361,12 @@ const SymbolIndex::Object * SymbolIndex::findObject(const void * address) const
return find(address, data.objects); return find(address, data.objects);
} }
SymbolIndex & SymbolIndex::instance()
{
static SymbolIndex instance;
return instance;
}
} }
#endif #endif

View File

@ -4,8 +4,8 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <ext/singleton.h>
#include <Common/Elf.h> #include <Common/Elf.h>
#include <boost/noncopyable.hpp>
namespace DB namespace DB
@ -15,13 +15,14 @@ namespace DB
* Used as a replacement for "dladdr" function which is extremely slow. * Used as a replacement for "dladdr" function which is extremely slow.
* It works better than "dladdr" because it also allows to search private symbols, that are not participated in shared linking. * It works better than "dladdr" because it also allows to search private symbols, that are not participated in shared linking.
*/ */
class SymbolIndex : public ext::singleton<SymbolIndex> class SymbolIndex : private boost::noncopyable
{ {
protected: protected:
friend class ext::singleton<SymbolIndex>;
SymbolIndex() { update(); } SymbolIndex() { update(); }
public: public:
static SymbolIndex & instance();
struct Symbol struct Symbol
{ {
const void * address_begin; const void * address_begin;

Some files were not shown because too many files have changed in this diff Show More