mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into removing-data-streams-folder
This commit is contained in:
commit
bfcbf5abe0
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -250,6 +250,9 @@
|
||||
[submodule "contrib/magic_enum"]
|
||||
path = contrib/magic_enum
|
||||
url = https://github.com/Neargye/magic_enum
|
||||
[submodule "contrib/libprotobuf-mutator"]
|
||||
path = contrib/libprotobuf-mutator
|
||||
url = https://github.com/google/libprotobuf-mutator
|
||||
[submodule "contrib/sysroot"]
|
||||
path = contrib/sysroot
|
||||
url = https://github.com/ClickHouse-Extras/sysroot.git
|
||||
|
@ -136,6 +136,21 @@ if (ENABLE_FUZZING)
|
||||
message (STATUS "Fuzzing instrumentation enabled")
|
||||
set (FUZZER "libfuzzer")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nostdlib++")
|
||||
set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF)
|
||||
set (ENABLE_LIBRARIES 0)
|
||||
set (ENABLE_SSL 1)
|
||||
set (USE_INTERNAL_SSL_LIBRARY 1)
|
||||
set (USE_UNWIND ON)
|
||||
set (ENABLE_EMBEDDED_COMPILER 0)
|
||||
set (ENABLE_EXAMPLES 0)
|
||||
set (ENABLE_UTILS 0)
|
||||
set (ENABLE_THINLTO 0)
|
||||
set (ENABLE_TCMALLOC 0)
|
||||
set (ENABLE_JEMALLOC 0)
|
||||
set (ENABLE_CHECK_HEAVY_BUILDS 1)
|
||||
set (GLIBC_COMPATIBILITY OFF)
|
||||
set (ENABLE_PROTOBUF ON)
|
||||
set (USE_INTERNAL_PROTOBUF_LIBRARY ON)
|
||||
endif()
|
||||
|
||||
# Global libraries
|
||||
@ -188,7 +203,7 @@ endif ()
|
||||
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
|
||||
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
|
||||
|
||||
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
|
||||
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
|
||||
# Only for Linux, x86_64 or aarch64.
|
||||
option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON)
|
||||
elseif(GLIBC_COMPATIBILITY)
|
||||
@ -203,10 +218,6 @@ if (GLIBC_COMPATIBILITY)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/base/glibc-compatibility/glibc-compat-2.32.h")
|
||||
endif()
|
||||
|
||||
if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0")
|
||||
message (WARNING "CMake version must be greater than 3.9.0 for production builds.")
|
||||
endif ()
|
||||
|
||||
# Make sure the final executable has symbols exported
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
|
||||
|
||||
@ -582,6 +593,7 @@ include (cmake/find/cassandra.cmake)
|
||||
include (cmake/find/sentry.cmake)
|
||||
include (cmake/find/stats.cmake)
|
||||
include (cmake/find/datasketches.cmake)
|
||||
include (cmake/find/libprotobuf-mutator.cmake)
|
||||
|
||||
set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "")
|
||||
find_contrib_lib(cityhash)
|
||||
|
@ -5,6 +5,10 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
|
||||
#ifdef OS_LINUX
|
||||
/// We can detect if code is linked with one or another readline variants or open the library dynamically.
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#if defined(__linux__) && !defined(THREAD_SANITIZER)
|
||||
#if defined(__linux__) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL)
|
||||
#define USE_PHDR_CACHE 1
|
||||
#endif
|
||||
|
||||
|
11
cmake/find/libprotobuf-mutator.cmake
Normal file
11
cmake/find/libprotobuf-mutator.cmake
Normal file
@ -0,0 +1,11 @@
|
||||
option(USE_LIBPROTOBUF_MUTATOR "Enable libprotobuf-mutator" ${ENABLE_FUZZING})
|
||||
|
||||
if (NOT USE_LIBPROTOBUF_MUTATOR)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(LibProtobufMutator_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator")
|
||||
|
||||
if (NOT EXISTS "${LibProtobufMutator_SOURCE_DIR}/README.md")
|
||||
message (ERROR "submodule contrib/libprotobuf-mutator is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
@ -14,6 +14,8 @@ endif ()
|
||||
if (OS_ANDROID)
|
||||
# pthread and rt are included in libc
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
|
||||
elseif (USE_MUSL)
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -static -lc")
|
||||
else ()
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
|
||||
endif ()
|
||||
@ -26,7 +28,7 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
# glibc-compatibility library relies to constant version of libc headers
|
||||
# (because minor changes in function attributes between different glibc versions will introduce incompatibilities)
|
||||
# This is for x86_64. For other architectures we have separate toolchains.
|
||||
if (ARCH_AMD64 AND NOT_UNBUNDLED)
|
||||
if (ARCH_AMD64 AND NOT_UNBUNDLED AND NOT CMAKE_CROSSCOMPILING)
|
||||
set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
|
||||
set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
|
||||
endif ()
|
||||
@ -37,8 +39,10 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
if (NOT OS_ANDROID)
|
||||
# Our compatibility layer doesn't build under Android, many errors in musl.
|
||||
add_subdirectory(base/glibc-compatibility)
|
||||
if (NOT USE_MUSL)
|
||||
# Our compatibility layer doesn't build under Android, many errors in musl.
|
||||
add_subdirectory(base/glibc-compatibility)
|
||||
endif ()
|
||||
add_subdirectory(base/harmful)
|
||||
endif ()
|
||||
|
||||
|
35
cmake/linux/toolchain-x86_64-musl.cmake
Normal file
35
cmake/linux/toolchain-x86_64-musl.cmake
Normal file
@ -0,0 +1,35 @@
|
||||
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
|
||||
|
||||
set (CMAKE_SYSTEM_NAME "Linux")
|
||||
set (CMAKE_SYSTEM_PROCESSOR "x86_64")
|
||||
set (CMAKE_C_COMPILER_TARGET "x86_64-linux-musl")
|
||||
set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-musl")
|
||||
set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-musl")
|
||||
|
||||
set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64-musl")
|
||||
|
||||
set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}")
|
||||
|
||||
find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-13" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
|
||||
find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-13" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9")
|
||||
|
||||
set (CMAKE_AR "${LLVM_AR_PATH}" CACHE FILEPATH "" FORCE)
|
||||
set (CMAKE_RANLIB "${LLVM_RANLIB_PATH}" CACHE FILEPATH "" FORCE)
|
||||
|
||||
set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
|
||||
set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE)
|
||||
|
||||
set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
|
||||
|
||||
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
|
||||
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
|
||||
|
||||
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
|
||||
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
|
||||
|
||||
set (USE_MUSL 1)
|
||||
add_definitions(-DUSE_MUSL=1)
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -49,6 +49,10 @@ add_subdirectory (replxx-cmake)
|
||||
add_subdirectory (unixodbc-cmake)
|
||||
add_subdirectory (nanodbc-cmake)
|
||||
|
||||
if (ENABLE_FUZZING)
|
||||
add_subdirectory (libprotobuf-mutator-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_YAML_CPP)
|
||||
add_subdirectory (yaml-cpp-cmake)
|
||||
endif()
|
||||
|
2
contrib/fastops
vendored
2
contrib/fastops
vendored
@ -1 +1 @@
|
||||
Subproject commit 012b777df9e2d145a24800a6c8c3d4a0249bb09e
|
||||
Subproject commit 1460583af7d13c0e980ce46aec8ee9400314669a
|
@ -18,8 +18,10 @@
|
||||
* Define overrides for non-standard allocator-related functions if they are
|
||||
* present on the system.
|
||||
*/
|
||||
#define JEMALLOC_OVERRIDE_MEMALIGN
|
||||
#define JEMALLOC_OVERRIDE_VALLOC
|
||||
#if !defined(USE_MUSL)
|
||||
#define JEMALLOC_OVERRIDE_MEMALIGN
|
||||
#define JEMALLOC_OVERRIDE_VALLOC
|
||||
#endif
|
||||
|
||||
/*
|
||||
* At least Linux omits the "const" in:
|
||||
|
@ -1,6 +1,6 @@
|
||||
// OSX does not have this for system alloc functions, so you will get
|
||||
// "exception specification in declaration" error.
|
||||
#if defined(__APPLE__) || defined(__FreeBSD__)
|
||||
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(USE_MUSL)
|
||||
# undef JEMALLOC_NOTHROW
|
||||
# define JEMALLOC_NOTHROW
|
||||
|
||||
|
@ -13,12 +13,14 @@
|
||||
* Define overrides for non-standard allocator-related functions if they are
|
||||
* present on the system.
|
||||
*/
|
||||
#define JEMALLOC_OVERRIDE___LIBC_CALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_FREE
|
||||
#define JEMALLOC_OVERRIDE___LIBC_MALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN
|
||||
#define JEMALLOC_OVERRIDE___LIBC_REALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_VALLOC
|
||||
#if !defined(USE_MUSL)
|
||||
#define JEMALLOC_OVERRIDE___LIBC_CALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_FREE
|
||||
#define JEMALLOC_OVERRIDE___LIBC_MALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN
|
||||
#define JEMALLOC_OVERRIDE___LIBC_REALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_VALLOC
|
||||
#endif
|
||||
/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
|
||||
|
||||
/*
|
||||
|
@ -56,6 +56,10 @@ if (USE_UNWIND)
|
||||
target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1)
|
||||
endif ()
|
||||
|
||||
if (USE_MUSL)
|
||||
target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1)
|
||||
endif ()
|
||||
|
||||
# Override the deduced attribute support that causes error.
|
||||
if (OS_DARWIN AND COMPILER_GCC)
|
||||
add_compile_definitions(_LIBCPP_INIT_PRIORITY_MAX)
|
||||
|
1
contrib/libprotobuf-mutator
vendored
Submodule
1
contrib/libprotobuf-mutator
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit ffd86a32874e5c08a143019aad1aaf0907294c9f
|
14
contrib/libprotobuf-mutator-cmake/CMakeLists.txt
Normal file
14
contrib/libprotobuf-mutator-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,14 @@
|
||||
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator)
|
||||
|
||||
add_library(protobuf-mutator
|
||||
${LIBRARY_DIR}/src/libfuzzer/libfuzzer_macro.cc
|
||||
${LIBRARY_DIR}/src/libfuzzer/libfuzzer_mutator.cc
|
||||
${LIBRARY_DIR}/src/binary_format.cc
|
||||
${LIBRARY_DIR}/src/mutator.cc
|
||||
${LIBRARY_DIR}/src/text_format.cc
|
||||
${LIBRARY_DIR}/src/utf8_fix.cc)
|
||||
|
||||
target_include_directories(protobuf-mutator BEFORE PRIVATE "${LIBRARY_DIR}")
|
||||
target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
|
||||
|
||||
target_link_libraries(protobuf-mutator ${Protobuf_LIBRARY})
|
@ -98,7 +98,9 @@
|
||||
#define HAVE_BCOPY 1
|
||||
|
||||
/* Define to 1 if you have the <bits/types.h> header file. */
|
||||
#define HAVE_BITS_TYPES_H 1
|
||||
#if !defined(USE_MUSL)
|
||||
#define HAVE_BITS_TYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `chroot' function. */
|
||||
#define HAVE_CHROOT 1
|
||||
|
2
contrib/sysroot
vendored
2
contrib/sysroot
vendored
@ -1 +1 @@
|
||||
Subproject commit 002415524b5d14124bb8a61a3ce7ac65774f5479
|
||||
Subproject commit e4663925b73beb57dd29154844c8d50441146753
|
@ -47,13 +47,17 @@ then
|
||||
fi
|
||||
|
||||
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
|
||||
echo
|
||||
echo "Will download ${URL}"
|
||||
echo
|
||||
curl -O "${URL}" && chmod a+x clickhouse &&
|
||||
echo
|
||||
echo "Successfully downloaded the ClickHouse binary, you can run it as:
|
||||
./clickhouse"
|
||||
|
||||
if [ "${OS}" = "Linux" ]
|
||||
then
|
||||
echo
|
||||
echo "You can also install it:
|
||||
sudo ./clickhouse install"
|
||||
fi
|
||||
|
@ -10,7 +10,7 @@ Columns:
|
||||
- `[]` — All users share the same quota.
|
||||
- `['user_name']` — Connections with the same user name share the same quota.
|
||||
- `['ip_address']` — Connections from the same IP share the same quota.
|
||||
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
|
||||
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
|
||||
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
|
||||
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
|
||||
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
|
||||
|
@ -155,6 +155,60 @@ Configuration example:
|
||||
LAYOUT(COMPLEX_KEY_HASHED())
|
||||
```
|
||||
|
||||
### complex_key_sparse_hashed {#complex-key-sparse-hashed}
|
||||
|
||||
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `sparse_hashed`.
|
||||
|
||||
Configuration example:
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
<complex_key_sparse_hashed />
|
||||
</layout>
|
||||
```
|
||||
|
||||
``` sql
|
||||
LAYOUT(COMPLEX_KEY_SPARSE_HASHED())
|
||||
```
|
||||
|
||||
### hashed_array {#dicts-external_dicts_dict_layout-hashed-array}
|
||||
|
||||
The dictionary is completely stored in memory. Each attribute is stored in array. Key attribute is stored in the form of hashed table where value is index in attributes array. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
|
||||
|
||||
All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety.
|
||||
|
||||
Configuration example:
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
<hashed_array>
|
||||
</hashed_array>
|
||||
</layout>
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
``` sql
|
||||
LAYOUT(HASHED_ARRAY())
|
||||
```
|
||||
|
||||
### complex_key_hashed_array {#complex-key-hashed-array}
|
||||
|
||||
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `hashed_array`.
|
||||
|
||||
Configuration example:
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
<complex_key_hashed_array />
|
||||
</layout>
|
||||
```
|
||||
|
||||
``` sql
|
||||
LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
|
||||
```
|
||||
|
||||
|
||||
### range_hashed {#range-hashed}
|
||||
|
||||
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values.
|
||||
|
@ -11,7 +11,7 @@
|
||||
- `[]` — Все пользователи используют одну и ту же квоту.
|
||||
- `['user_name']` — Соединения с одинаковым именем пользователя используют одну и ту же квоту.
|
||||
- `['ip_address']` — Соединения с одинаковым IP-адресом используют одну и ту же квоту.
|
||||
- `['client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Ключ может быть явно задан клиентом. При использовании [clickhouse-client](../../interfaces/cli.md), передайте ключевое значение в параметре `--quota-key`, или используйте параметр `quota_key` файле настроек клиента. В случае использования HTTP интерфейса, используйте заголовок `X-ClickHouse-Quota`.
|
||||
- `['client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Ключ может быть явно задан клиентом. При использовании [clickhouse-client](../../interfaces/cli.md), передайте ключевое значение в параметре `--quota_key`, или используйте параметр `quota_key` файле настроек клиента. В случае использования HTTP интерфейса, используйте заголовок `X-ClickHouse-Quota`.
|
||||
- `['user_name', 'client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Если ключ не предоставлен клиентом, то квота отслеживается для `user_name`.
|
||||
- `['client_key', 'ip_address']` — Соединения с одинаковым ключом используют одну и ту же квоту. Если ключ не предоставлен клиентом, то квота отслеживается для `ip_address`.
|
||||
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Длины временных интервалов для расчета потребления ресурсов, в секундах.
|
||||
|
@ -21,8 +21,6 @@
|
||||
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
|
||||
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
|
||||
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
|
||||
- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md)
|
||||
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
|
||||
|
||||
!!! note "Примечание"
|
||||
Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются.
|
||||
|
@ -516,6 +516,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query)
|
||||
const size_t poll_interval
|
||||
= std::max(min_poll_interval, std::min<size_t>(receive_timeout.totalMicroseconds(), default_poll_interval));
|
||||
|
||||
bool break_on_timeout = connection->getConnectionType() != IServerConnection::Type::LOCAL;
|
||||
while (true)
|
||||
{
|
||||
Stopwatch receive_watch(CLOCK_MONOTONIC_COARSE);
|
||||
@ -546,7 +547,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query)
|
||||
else
|
||||
{
|
||||
double elapsed = receive_watch.elapsedSeconds();
|
||||
if (elapsed > receive_timeout.totalSeconds())
|
||||
if (break_on_timeout && elapsed > receive_timeout.totalSeconds())
|
||||
{
|
||||
std::cout << "Timeout exceeded while receiving data from server."
|
||||
<< " Waited for " << static_cast<size_t>(elapsed) << " seconds,"
|
||||
|
@ -60,6 +60,8 @@ public:
|
||||
|
||||
~Connection() override;
|
||||
|
||||
IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::SERVER; }
|
||||
|
||||
static ServerConnectionPtr createConnection(const ConnectionParameters & parameters, ContextPtr context);
|
||||
|
||||
/// Set throttler of network traffic. One throttler could be used for multiple connections to limit total traffic.
|
||||
|
@ -56,6 +56,14 @@ class IServerConnection : boost::noncopyable
|
||||
public:
|
||||
virtual ~IServerConnection() = default;
|
||||
|
||||
enum class Type
|
||||
{
|
||||
SERVER,
|
||||
LOCAL
|
||||
};
|
||||
|
||||
virtual Type getConnectionType() const = 0;
|
||||
|
||||
virtual void setDefaultDatabase(const String & database) = 0;
|
||||
|
||||
virtual void getServerVersion(
|
||||
|
@ -60,15 +60,15 @@ void LocalConnection::updateProgress(const Progress & value)
|
||||
|
||||
void LocalConnection::sendQuery(
|
||||
const ConnectionTimeouts &,
|
||||
const String & query_,
|
||||
const String & query_id_,
|
||||
UInt64,
|
||||
const String & query,
|
||||
const String & query_id,
|
||||
UInt64 stage,
|
||||
const Settings *,
|
||||
const ClientInfo *,
|
||||
bool)
|
||||
{
|
||||
query_context = session.makeQueryContext();
|
||||
query_context->setCurrentQueryId(query_id_);
|
||||
query_context->setCurrentQueryId(query_id);
|
||||
if (send_progress)
|
||||
query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); });
|
||||
|
||||
@ -77,8 +77,9 @@ void LocalConnection::sendQuery(
|
||||
state.reset();
|
||||
state.emplace();
|
||||
|
||||
state->query_id = query_id_;
|
||||
state->query = query_;
|
||||
state->query_id = query_id;
|
||||
state->query = query;
|
||||
state->stage = QueryProcessingStage::Enum(stage);
|
||||
|
||||
if (send_progress)
|
||||
state->after_send_progress.restart();
|
||||
|
@ -56,6 +56,8 @@ public:
|
||||
|
||||
~LocalConnection() override;
|
||||
|
||||
IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::LOCAL; }
|
||||
|
||||
static ServerConnectionPtr createConnection(const ConnectionParameters & connection_parameters, ContextPtr current_context, bool send_progress = false);
|
||||
|
||||
void setDefaultDatabase(const String & database) override;
|
||||
@ -76,7 +78,7 @@ public:
|
||||
void sendQuery(
|
||||
const ConnectionTimeouts & timeouts,
|
||||
const String & query,
|
||||
const String & query_id_/* = "" */,
|
||||
const String & query_id/* = "" */,
|
||||
UInt64 stage/* = QueryProcessingStage::Complete */,
|
||||
const Settings * settings/* = nullptr */,
|
||||
const ClientInfo * client_info/* = nullptr */,
|
||||
|
@ -124,11 +124,13 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
|
||||
sev.sigev_notify = SIGEV_THREAD_ID;
|
||||
sev.sigev_signo = pause_signal;
|
||||
|
||||
# if defined(OS_FREEBSD)
|
||||
#if defined(OS_FREEBSD)
|
||||
sev._sigev_un._threadid = thread_id;
|
||||
# else
|
||||
#elif defined(USE_MUSL)
|
||||
sev.sigev_notify_thread_id = thread_id;
|
||||
#else
|
||||
sev._sigev_un._tid = thread_id;
|
||||
# endif
|
||||
#endif
|
||||
if (timer_create(clock_type, &sev, &timer_id))
|
||||
{
|
||||
/// In Google Cloud Run, the function "timer_create" is implemented incorrectly as of 2020-01-25.
|
||||
|
@ -17,7 +17,9 @@ extern "C"
|
||||
void *aligned_alloc(size_t alignment, size_t size);
|
||||
void *valloc(size_t size);
|
||||
void *memalign(size_t alignment, size_t size);
|
||||
#if !defined(USE_MUSL)
|
||||
void *pvalloc(size_t size);
|
||||
#endif
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
@ -39,6 +41,8 @@ static void dummyFunctionForInterposing()
|
||||
ignore(aligned_alloc(0, 0)); // -V575 NOLINT
|
||||
ignore(valloc(0)); // -V575 NOLINT
|
||||
ignore(memalign(0, 0)); // -V575 NOLINT
|
||||
#if !defined(USE_MUSL)
|
||||
ignore(pvalloc(0)); // -V575 NOLINT
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
@ -10,6 +10,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW
|
||||
set_source_files_properties(
|
||||
FlatDictionary.cpp
|
||||
HashedDictionary.cpp
|
||||
HashedArrayDictionary.cpp
|
||||
CacheDictionary.cpp
|
||||
RangeHashedDictionary.cpp
|
||||
DirectDictionary.cpp
|
||||
|
691
src/Dictionaries/HashedArrayDictionary.cpp
Normal file
691
src/Dictionaries/HashedArrayDictionary.cpp
Normal file
@ -0,0 +1,691 @@
|
||||
#include "HashedArrayDictionary.h"
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <Dictionaries/DictionarySource.h>
|
||||
#include <Dictionaries/DictionaryFactory.h>
|
||||
#include <Dictionaries/HierarchyDictionariesUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int DICTIONARY_IS_EMPTY;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
HashedArrayDictionary<dictionary_key_type>::HashedArrayDictionary(
|
||||
const StorageID & dict_id_,
|
||||
const DictionaryStructure & dict_struct_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
const HashedArrayDictionaryStorageConfiguration & configuration_,
|
||||
BlockPtr update_field_loaded_block_)
|
||||
: IDictionary(dict_id_)
|
||||
, dict_struct(dict_struct_)
|
||||
, source_ptr(std::move(source_ptr_))
|
||||
, configuration(configuration_)
|
||||
, update_field_loaded_block(std::move(update_field_loaded_block_))
|
||||
{
|
||||
createAttributes();
|
||||
loadData();
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types [[maybe_unused]],
|
||||
const ColumnPtr & default_values_column) const
|
||||
{
|
||||
if (dictionary_key_type == DictionaryKeyType::Complex)
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
ColumnPtr result;
|
||||
|
||||
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
|
||||
|
||||
const size_t size = extractor.getKeysSize();
|
||||
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
|
||||
auto & attribute = attributes[attribute_index];
|
||||
|
||||
bool is_attribute_nullable = attribute.is_index_null.has_value();
|
||||
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (attribute.is_index_null)
|
||||
{
|
||||
col_null_map_to = ColumnUInt8::create(size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(dictionary_attribute.null_value, default_values_column);
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
if constexpr (std::is_same_v<ValueType, Array>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsImpl<ValueType, false>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](const size_t, const Array & value, bool) { out->insert(value); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else if constexpr (std::is_same_v<ValueType, StringRef>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
if (is_attribute_nullable)
|
||||
getItemsImpl<ValueType, true>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t row, const StringRef value, bool is_null)
|
||||
{
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
out->insertData(value.data, value.size);
|
||||
},
|
||||
default_value_extractor);
|
||||
else
|
||||
getItemsImpl<ValueType, false>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t, const StringRef value, bool) { out->insertData(value.data, value.size); },
|
||||
default_value_extractor);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
if (is_attribute_nullable)
|
||||
getItemsImpl<ValueType, true>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t row, const auto value, bool is_null)
|
||||
{
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
out[row] = value;
|
||||
},
|
||||
default_value_extractor);
|
||||
else
|
||||
getItemsImpl<ValueType, false>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t row, const auto value, bool) { out[row] = value; },
|
||||
default_value_extractor);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (is_attribute_nullable)
|
||||
result = ColumnNullable::create(std::move(result), std::move(col_null_map_to));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
if (dictionary_key_type == DictionaryKeyType::Complex)
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
|
||||
|
||||
size_t keys_size = extractor.getKeysSize();
|
||||
|
||||
auto result = ColumnUInt8::create(keys_size, false);
|
||||
auto & out = result->getData();
|
||||
|
||||
if (attributes.empty())
|
||||
{
|
||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
|
||||
{
|
||||
auto requested_key = extractor.extractCurrentKey();
|
||||
|
||||
out[requested_key_index] = key_attribute.container.find(requested_key) != key_attribute.container.end();
|
||||
|
||||
keys_found += out[requested_key_index];
|
||||
extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
{
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
|
||||
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
|
||||
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
auto get_parent_func = [&](auto & hierarchy_key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
|
||||
auto it = key_attribute_container.find(hierarchy_key);
|
||||
|
||||
if (it != key_attribute_container.end())
|
||||
result = parent_keys_container[it->getMapped()];
|
||||
|
||||
keys_found += result.has_value();
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
||||
return dictionary_hierarchy_array;
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
|
||||
ColumnPtr key_column [[maybe_unused]],
|
||||
ColumnPtr in_key_column [[maybe_unused]],
|
||||
const DataTypePtr &) const
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
{
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
PaddedPODArray<UInt64> keys_in_backup_storage;
|
||||
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
|
||||
|
||||
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
|
||||
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
|
||||
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
auto get_parent_func = [&](auto & hierarchy_key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
|
||||
auto it = key_attribute_container.find(hierarchy_key);
|
||||
|
||||
if (it != key_attribute_container.end())
|
||||
result = parent_keys_container[it->getMapped()];
|
||||
|
||||
keys_found += result.has_value();
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getDescendants(
|
||||
ColumnPtr key_column [[maybe_unused]],
|
||||
const DataTypePtr &,
|
||||
size_t level [[maybe_unused]]) const
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
{
|
||||
PaddedPODArray<UInt64> keys_backup;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup);
|
||||
|
||||
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
|
||||
|
||||
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
|
||||
HashMap<size_t, UInt64> index_to_key;
|
||||
index_to_key.reserve(key_attribute.container.size());
|
||||
|
||||
for (auto & [key, value] : key_attribute_container)
|
||||
index_to_key[value] = key;
|
||||
|
||||
HashMap<UInt64, PaddedPODArray<UInt64>> parent_to_child;
|
||||
|
||||
for (size_t i = 0; i < parent_keys_container.size(); ++i)
|
||||
{
|
||||
const auto * it = index_to_key.find(i);
|
||||
if (it == index_to_key.end())
|
||||
continue;
|
||||
|
||||
auto parent_key = it->getMapped();
|
||||
auto child_key = parent_keys_container[i];
|
||||
parent_to_child[parent_key].emplace_back(child_key);
|
||||
}
|
||||
|
||||
size_t keys_found = 0;
|
||||
auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found);
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void HashedArrayDictionary<dictionary_key_type>::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
|
||||
for (const auto & dictionary_attribute : dict_struct.attributes)
|
||||
{
|
||||
auto type_call = [&, this](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional<std::vector<bool>>() : std::optional<std::vector<bool>>{};
|
||||
std::unique_ptr<Arena> string_arena = std::is_same_v<AttributeType, String> ? std::make_unique<Arena>() : nullptr;
|
||||
Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerType<ValueType>(), std::move(is_index_null), std::move(string_arena)};
|
||||
attributes.emplace_back(std::move(attribute));
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void HashedArrayDictionary<dictionary_key_type>::updateData()
|
||||
{
|
||||
if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
|
||||
{
|
||||
QueryPipeline pipeline(source_ptr->loadUpdatedAll());
|
||||
|
||||
PullingPipelineExecutor executor(pipeline);
|
||||
Block block;
|
||||
while (executor.pull(block))
|
||||
{
|
||||
/// We are using this to keep saved data if input stream consists of multiple blocks
|
||||
if (!update_field_loaded_block)
|
||||
update_field_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
|
||||
|
||||
for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index)
|
||||
{
|
||||
const IColumn & update_column = *block.getByPosition(attribute_index).column.get();
|
||||
MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable();
|
||||
saved_column->insertRangeFrom(update_column, 0, update_column.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto pipe = source_ptr->loadUpdatedAll();
|
||||
mergeBlockWithPipe<dictionary_key_type>(
|
||||
dict_struct.getKeysSize(),
|
||||
*update_field_loaded_block,
|
||||
std::move(pipe));
|
||||
}
|
||||
|
||||
if (update_field_loaded_block)
|
||||
{
|
||||
resize(update_field_loaded_block->rows());
|
||||
blockToAttributes(*update_field_loaded_block.get());
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block & block [[maybe_unused]])
|
||||
{
|
||||
size_t skip_keys_size_offset = dict_struct.getKeysSize();
|
||||
|
||||
Columns key_columns;
|
||||
key_columns.reserve(skip_keys_size_offset);
|
||||
|
||||
/// Split into keys columns and attribute columns
|
||||
for (size_t i = 0; i < skip_keys_size_offset; ++i)
|
||||
key_columns.emplace_back(block.safeGetByPosition(i).column);
|
||||
|
||||
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
|
||||
const size_t keys_size = keys_extractor.getKeysSize();
|
||||
|
||||
Field column_value_to_insert;
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto key = keys_extractor.extractCurrentKey();
|
||||
|
||||
auto it = key_attribute.container.find(key);
|
||||
|
||||
if (it != key_attribute.container.end())
|
||||
{
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
continue;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<KeyType, StringRef>)
|
||||
key = copyKeyInArena(key);
|
||||
|
||||
key_attribute.container.insert({key, element_count});
|
||||
|
||||
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
||||
{
|
||||
const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column;
|
||||
auto & attribute = attributes[attribute_index];
|
||||
bool attribute_is_nullable = attribute.is_index_null.has_value();
|
||||
|
||||
attribute_column.get(key_index, column_value_to_insert);
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using AttributeValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
auto & attribute_container = std::get<AttributeContainerType<AttributeValueType>>(attribute.container);
|
||||
attribute_container.emplace_back();
|
||||
|
||||
if (attribute_is_nullable)
|
||||
{
|
||||
attribute.is_index_null->emplace_back();
|
||||
|
||||
if (column_value_to_insert.isNull())
|
||||
{
|
||||
(*attribute.is_index_null).back() = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<AttributeValueType, StringRef>)
|
||||
{
|
||||
String & value_to_insert = column_value_to_insert.get<String>();
|
||||
size_t value_to_insert_size = value_to_insert.size();
|
||||
|
||||
const char * string_in_arena = attribute.string_arena->insert(value_to_insert.data(), value_to_insert_size);
|
||||
|
||||
StringRef string_in_arena_reference = StringRef{string_in_arena, value_to_insert_size};
|
||||
attribute_container.back() = string_in_arena_reference;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto value_to_insert = column_value_to_insert.get<NearestFieldType<AttributeValueType>>();
|
||||
attribute_container.back() = value_to_insert;
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
}
|
||||
|
||||
++element_count;
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void HashedArrayDictionary<dictionary_key_type>::resize(size_t added_rows)
|
||||
{
|
||||
if (unlikely(!added_rows))
|
||||
return;
|
||||
|
||||
key_attribute.container.reserve(added_rows);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
|
||||
ValueSetter && set_value [[maybe_unused]],
|
||||
DefaultValueExtractor & default_value_extractor) const
|
||||
{
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
|
||||
const size_t keys_size = keys_extractor.getKeysSize();
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto key = keys_extractor.extractCurrentKey();
|
||||
|
||||
const auto it = key_attribute_container.find(key);
|
||||
|
||||
if (it != key_attribute_container.end())
|
||||
{
|
||||
size_t element_index = it->getMapped();
|
||||
|
||||
const auto & element = attribute_container[element_index];
|
||||
|
||||
if constexpr (is_nullable)
|
||||
set_value(key_index, element, (*attribute.is_index_null)[element_index]);
|
||||
else
|
||||
set_value(key_index, element, false);
|
||||
|
||||
++keys_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (is_nullable)
|
||||
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
|
||||
else
|
||||
set_value(key_index, default_value_extractor[key_index], false);
|
||||
}
|
||||
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
StringRef HashedArrayDictionary<dictionary_key_type>::copyKeyInArena(StringRef key)
|
||||
{
|
||||
size_t key_size = key.size;
|
||||
char * place_for_key = complex_key_arena.alloc(key_size);
|
||||
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
|
||||
StringRef updated_key{place_for_key, key_size};
|
||||
return updated_key;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void HashedArrayDictionary<dictionary_key_type>::loadData()
|
||||
{
|
||||
if (!source_ptr->hasUpdateField())
|
||||
{
|
||||
QueryPipeline pipeline;
|
||||
pipeline = QueryPipeline(source_ptr->loadAll());
|
||||
|
||||
PullingPipelineExecutor executor(pipeline);
|
||||
Block block;
|
||||
while (executor.pull(block))
|
||||
{
|
||||
resize(block.rows());
|
||||
blockToAttributes(block);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
updateData();
|
||||
}
|
||||
|
||||
if (configuration.require_nonempty && 0 == element_count)
|
||||
throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
|
||||
"{}: dictionary source is empty and 'require_nonempty' property is set.",
|
||||
full_name);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
bytes_allocated += key_attribute.container.size();
|
||||
|
||||
for (auto & attribute : attributes)
|
||||
{
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
const auto & container = std::get<AttributeContainerType<ValueType>>(attribute.container);
|
||||
bytes_allocated += sizeof(AttributeContainerType<ValueType>);
|
||||
|
||||
if constexpr (std::is_same_v<ValueType, Array>)
|
||||
{
|
||||
/// It is not accurate calculations
|
||||
bytes_allocated += sizeof(Array) * container.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes_allocated += container.allocated_bytes();
|
||||
}
|
||||
|
||||
bucket_count = container.capacity();
|
||||
|
||||
if constexpr (std::is_same_v<ValueType, StringRef>)
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (attribute.string_arena)
|
||||
bytes_allocated += attribute.string_arena->size();
|
||||
|
||||
if (attribute.is_index_null.has_value())
|
||||
bytes_allocated += (*attribute.is_index_null).size();
|
||||
}
|
||||
|
||||
bytes_allocated += complex_key_arena.size();
|
||||
|
||||
if (update_field_loaded_block)
|
||||
bytes_allocated += update_field_loaded_block->allocatedBytes();
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
PaddedPODArray<HashedArrayDictionary::KeyType> keys;
|
||||
keys.reserve(key_attribute.container.size());
|
||||
|
||||
for (auto & [key, _] : key_attribute.container)
|
||||
keys.emplace_back(key);
|
||||
|
||||
return Pipe(std::make_shared<DictionarySource>(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size));
|
||||
}
|
||||
|
||||
template class HashedArrayDictionary<DictionaryKeyType::Simple>;
|
||||
template class HashedArrayDictionary<DictionaryKeyType::Complex>;
|
||||
|
||||
void registerDictionaryArrayHashed(DictionaryFactory & factory)
|
||||
{
|
||||
auto create_layout = [](const std::string & full_name,
|
||||
const DictionaryStructure & dict_struct,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
DictionarySourcePtr source_ptr,
|
||||
DictionaryKeyType dictionary_key_type) -> DictionaryPtr
|
||||
{
|
||||
if (dictionary_key_type == DictionaryKeyType::Simple && dict_struct.key)
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for simple key hashed array dictionary");
|
||||
else if (dictionary_key_type == DictionaryKeyType::Complex && dict_struct.id)
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for complex key hashed array dictionary");
|
||||
|
||||
if (dict_struct.range_min || dict_struct.range_max)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"{}: elements .structure.range_min and .structure.range_max should be defined only "
|
||||
"for a dictionary of layout 'range_hashed'",
|
||||
full_name);
|
||||
|
||||
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||
|
||||
HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime};
|
||||
|
||||
if (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple>>(dict_id, dict_struct, std::move(source_ptr), configuration);
|
||||
else
|
||||
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex>>(dict_id, dict_struct, std::move(source_ptr), configuration);
|
||||
};
|
||||
|
||||
using namespace std::placeholders;
|
||||
|
||||
factory.registerLayout("hashed_array",
|
||||
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple); }, false);
|
||||
factory.registerLayout("complex_key_hashed_array",
|
||||
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex); }, true);
|
||||
}
|
||||
|
||||
}
|
211
src/Dictionaries/HashedArrayDictionary.h
Normal file
211
src/Dictionaries/HashedArrayDictionary.h
Normal file
@ -0,0 +1,211 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
#include <optional>
|
||||
|
||||
#include <Common/SparseHashMap.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Core/Block.h>
|
||||
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <Dictionaries/IDictionarySource.h>
|
||||
#include <Dictionaries/DictionaryHelpers.h>
|
||||
|
||||
/** This dictionary stores all attributes in arrays.
|
||||
* Key is stored in hash table and value is index into attribute array.
|
||||
*/
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct HashedArrayDictionaryStorageConfiguration
|
||||
{
|
||||
const bool require_nonempty;
|
||||
const DictionaryLifetime lifetime;
|
||||
};
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
class HashedArrayDictionary final : public IDictionary
|
||||
{
|
||||
public:
|
||||
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
|
||||
|
||||
HashedArrayDictionary(
|
||||
const StorageID & dict_id_,
|
||||
const DictionaryStructure & dict_struct_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
const HashedArrayDictionaryStorageConfiguration & configuration_,
|
||||
BlockPtr update_field_loaded_block_ = nullptr);
|
||||
|
||||
std::string getTypeName() const override
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
return "HashedArray";
|
||||
else
|
||||
return "ComplexHashedArray";
|
||||
}
|
||||
|
||||
size_t getBytesAllocated() const override { return bytes_allocated; }
|
||||
|
||||
size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
|
||||
|
||||
double getFoundRate() const override
|
||||
{
|
||||
size_t queries = query_count.load(std::memory_order_relaxed);
|
||||
if (!queries)
|
||||
return 0;
|
||||
return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
|
||||
}
|
||||
|
||||
double getHitRate() const override { return 1.0; }
|
||||
|
||||
size_t getElementCount() const override { return element_count; }
|
||||
|
||||
double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> clone() const override
|
||||
{
|
||||
return std::make_shared<HashedArrayDictionary<dictionary_key_type>>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block);
|
||||
}
|
||||
|
||||
const IDictionarySource * getSource() const override { return source_ptr.get(); }
|
||||
|
||||
const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; }
|
||||
|
||||
const DictionaryStructure & getStructure() const override { return dict_struct; }
|
||||
|
||||
bool isInjective(const std::string & attribute_name) const override
|
||||
{
|
||||
return dict_struct.getAttribute(attribute_name).injective;
|
||||
}
|
||||
|
||||
DictionaryKeyType getKeyType() const override { return dictionary_key_type; }
|
||||
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr & default_values_column) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::Simple && dict_struct.hierarchical_attribute_index.has_value(); }
|
||||
|
||||
ColumnPtr getHierarchy(ColumnPtr key_column, const DataTypePtr & hierarchy_attribute_type) const override;
|
||||
|
||||
ColumnUInt8::Ptr isInHierarchy(
|
||||
ColumnPtr key_column,
|
||||
ColumnPtr in_key_column,
|
||||
const DataTypePtr & key_type) const override;
|
||||
|
||||
ColumnPtr getDescendants(
|
||||
ColumnPtr key_column,
|
||||
const DataTypePtr & key_type,
|
||||
size_t level) const override;
|
||||
|
||||
Pipe read(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
|
||||
using KeyContainerType = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::Simple,
|
||||
HashMap<UInt64, size_t>,
|
||||
HashMapWithSavedHash<StringRef, size_t, DefaultHash<StringRef>>>;
|
||||
|
||||
template <typename Value>
|
||||
using AttributeContainerType = std::conditional_t<std::is_same_v<Value, Array>, std::vector<Value>, PaddedPODArray<Value>>;
|
||||
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
|
||||
std::variant<
|
||||
AttributeContainerType<UInt8>,
|
||||
AttributeContainerType<UInt16>,
|
||||
AttributeContainerType<UInt32>,
|
||||
AttributeContainerType<UInt64>,
|
||||
AttributeContainerType<UInt128>,
|
||||
AttributeContainerType<UInt256>,
|
||||
AttributeContainerType<Int8>,
|
||||
AttributeContainerType<Int16>,
|
||||
AttributeContainerType<Int32>,
|
||||
AttributeContainerType<Int64>,
|
||||
AttributeContainerType<Int128>,
|
||||
AttributeContainerType<Int256>,
|
||||
AttributeContainerType<Decimal32>,
|
||||
AttributeContainerType<Decimal64>,
|
||||
AttributeContainerType<Decimal128>,
|
||||
AttributeContainerType<Decimal256>,
|
||||
AttributeContainerType<Float32>,
|
||||
AttributeContainerType<Float64>,
|
||||
AttributeContainerType<UUID>,
|
||||
AttributeContainerType<StringRef>,
|
||||
AttributeContainerType<Array>>
|
||||
container;
|
||||
|
||||
std::optional<std::vector<bool>> is_index_null;
|
||||
std::unique_ptr<Arena> string_arena;
|
||||
};
|
||||
|
||||
struct KeyAttribute final
|
||||
{
|
||||
|
||||
KeyContainerType container;
|
||||
|
||||
};
|
||||
|
||||
void createAttributes();
|
||||
|
||||
void blockToAttributes(const Block & block);
|
||||
|
||||
void updateData();
|
||||
|
||||
void loadData();
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename GetContainerFunc>
|
||||
void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func);
|
||||
|
||||
template <typename GetContainerFunc>
|
||||
void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const;
|
||||
|
||||
void resize(size_t added_rows);
|
||||
|
||||
StringRef copyKeyInArena(StringRef key);
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
const HashedArrayDictionaryStorageConfiguration configuration;
|
||||
|
||||
std::vector<Attribute> attributes;
|
||||
|
||||
KeyAttribute key_attribute;
|
||||
|
||||
size_t bytes_allocated = 0;
|
||||
size_t element_count = 0;
|
||||
size_t bucket_count = 0;
|
||||
mutable std::atomic<size_t> query_count{0};
|
||||
mutable std::atomic<size_t> found_count{0};
|
||||
|
||||
BlockPtr update_field_loaded_block;
|
||||
Arena complex_key_arena;
|
||||
};
|
||||
|
||||
extern template class HashedArrayDictionary<DictionaryKeyType::Simple>;
|
||||
extern template class HashedArrayDictionary<DictionaryKeyType::Complex>;
|
||||
|
||||
}
|
@ -733,8 +733,18 @@ void registerDictionaryHashed(DictionaryFactory & factory)
|
||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||
|
||||
const std::string & layout_prefix = sparse ? ".layout.sparse_hashed" : ".layout.hashed";
|
||||
const bool preallocate = config.getBool(config_prefix + layout_prefix + ".preallocate", false);
|
||||
std::string dictionary_layout_name;
|
||||
|
||||
if (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
dictionary_layout_name = "hashed";
|
||||
else
|
||||
dictionary_layout_name = "complex_key_hashed";
|
||||
|
||||
if (sparse)
|
||||
dictionary_layout_name = "sparse_" + dictionary_layout_name;
|
||||
|
||||
const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name;
|
||||
const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false);
|
||||
|
||||
HashedDictionaryStorageConfiguration configuration{preallocate, require_nonempty, dict_lifetime};
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <IO/AIO.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/ICacheDictionaryStorage.h>
|
||||
#include <Dictionaries/DictionaryHelpers.h>
|
||||
|
@ -28,6 +28,7 @@ void registerDictionaryComplexKeyHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryTrie(DictionaryFactory & factory);
|
||||
void registerDictionaryFlat(DictionaryFactory & factory);
|
||||
void registerDictionaryHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryArrayHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryCache(DictionaryFactory & factory);
|
||||
void registerDictionaryPolygon(DictionaryFactory & factory);
|
||||
void registerDictionaryDirect(DictionaryFactory & factory);
|
||||
@ -60,6 +61,7 @@ void registerDictionaries()
|
||||
registerDictionaryTrie(factory);
|
||||
registerDictionaryFlat(factory);
|
||||
registerDictionaryHashed(factory);
|
||||
registerDictionaryArrayHashed(factory);
|
||||
registerDictionaryCache(factory);
|
||||
registerDictionaryPolygon(factory);
|
||||
registerDictionaryDirect(factory);
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <Processors/Formats/Impl/ParallelFormattingOutputFormat.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
namespace DB
|
||||
|
@ -1,9 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Allocator.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <base/types.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
@ -34,6 +34,9 @@ struct RowOutputFormatParams;
|
||||
using InputFormatPtr = std::shared_ptr<IInputFormat>;
|
||||
using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
|
||||
|
||||
template <typename Allocator>
|
||||
struct Memory;
|
||||
|
||||
FormatSettings getFormatSettings(ContextPtr context);
|
||||
|
||||
template <typename T>
|
||||
@ -55,7 +58,7 @@ public:
|
||||
*/
|
||||
using FileSegmentationEngine = std::function<std::pair<bool, size_t>(
|
||||
ReadBuffer & buf,
|
||||
DB::Memory<> & memory,
|
||||
DB::Memory<Allocator<false>> & memory,
|
||||
size_t min_chunk_bytes)>;
|
||||
|
||||
/// This callback allows to perform some additional actions after writing a single row.
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Formats/JSONEachRowUtils.h>
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
namespace DB
|
||||
|
@ -1,5 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <utility>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -124,8 +124,8 @@ public:
|
||||
*/
|
||||
struct Instruction
|
||||
{
|
||||
const IColumn * condition = nullptr;
|
||||
const IColumn * source = nullptr;
|
||||
IColumn::Ptr condition = nullptr;
|
||||
IColumn::Ptr source = nullptr;
|
||||
|
||||
bool condition_always_true = false;
|
||||
bool condition_is_nullable = false;
|
||||
@ -160,15 +160,15 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
const ColumnWithTypeAndName & cond_col = arguments[i];
|
||||
IColumn::Ptr cond_col = arguments[i].column->convertToFullColumnIfLowCardinality();
|
||||
|
||||
/// We skip branches that are always false.
|
||||
/// If we encounter a branch that is always true, we can finish.
|
||||
|
||||
if (cond_col.column->onlyNull())
|
||||
if (cond_col->onlyNull())
|
||||
continue;
|
||||
|
||||
if (const auto * column_const = checkAndGetColumn<ColumnConst>(*cond_col.column))
|
||||
if (const auto * column_const = checkAndGetColumn<ColumnConst>(*cond_col))
|
||||
{
|
||||
Field value = column_const->getField();
|
||||
|
||||
@ -181,26 +181,24 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isColumnNullable(*cond_col.column))
|
||||
instruction.condition_is_nullable = true;
|
||||
|
||||
instruction.condition = cond_col.column.get();
|
||||
instruction.condition = cond_col;
|
||||
instruction.condition_is_nullable = instruction.condition->isNullable();
|
||||
}
|
||||
|
||||
instruction.condition_is_short = cond_col.column->size() < arguments[0].column->size();
|
||||
instruction.condition_is_short = cond_col->size() < arguments[0].column->size();
|
||||
}
|
||||
|
||||
const ColumnWithTypeAndName & source_col = arguments[source_idx];
|
||||
instruction.source_is_short = source_col.column->size() < arguments[0].column->size();
|
||||
if (source_col.type->equals(*return_type))
|
||||
{
|
||||
instruction.source = source_col.column.get();
|
||||
instruction.source = source_col.column;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Cast all columns to result type.
|
||||
converted_columns_holder.emplace_back(castColumn(source_col, return_type));
|
||||
instruction.source = converted_columns_holder.back().get();
|
||||
instruction.source = converted_columns_holder.back();
|
||||
}
|
||||
|
||||
if (instruction.source && isColumnConst(*instruction.source))
|
||||
|
@ -7,17 +7,6 @@
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
|
||||
#if defined(__OpenBSD__) || defined(__FreeBSD__) || defined (__ANDROID__)
|
||||
# include <sys/endian.h>
|
||||
#elif defined(__sun)
|
||||
# include <endian.h>
|
||||
#elif defined(__APPLE__)
|
||||
# include <libkern/OSByteOrder.h>
|
||||
|
||||
# define htobe64(x) OSSwapHostToBigInt64(x)
|
||||
# define be64toh(x) OSSwapBigToHostInt64(x)
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -152,7 +141,7 @@ private:
|
||||
memcpy(&tmp_buffer, source_current, bytes_to_read);
|
||||
source_current += bytes_to_read;
|
||||
|
||||
tmp_buffer = be64toh(tmp_buffer);
|
||||
tmp_buffer = __builtin_bswap64(tmp_buffer);
|
||||
|
||||
bits_buffer |= BufferType(tmp_buffer) << ((sizeof(BufferType) - sizeof(tmp_buffer)) * 8 - bits_count);
|
||||
bits_count += static_cast<UInt8>(bytes_to_read) * 8;
|
||||
@ -200,7 +189,7 @@ public:
|
||||
capacity = BIT_BUFFER_SIZE - bits_count;
|
||||
}
|
||||
|
||||
// write low bits of value as high bits of bits_buffer
|
||||
// write low bits of value as high bits of bits_buffer
|
||||
const UInt64 mask = maskLowBits<UInt64>(bits_to_write);
|
||||
BufferType v = value & mask;
|
||||
v <<= capacity - bits_to_write;
|
||||
@ -212,7 +201,7 @@ public:
|
||||
// flush contents of bits_buffer to the dest_current, partial bytes are completed with zeroes.
|
||||
inline void flush()
|
||||
{
|
||||
bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align UP to 8-bytes, so doFlush will write ALL data from bits_buffer
|
||||
bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align up to 8-bytes, so doFlush will write all data from bits_buffer
|
||||
while (bits_count != 0)
|
||||
doFlush();
|
||||
}
|
||||
@ -231,13 +220,12 @@ private:
|
||||
|
||||
if (available < to_write)
|
||||
{
|
||||
throw Exception("Can not write past end of buffer. Space available "
|
||||
+ std::to_string(available) + " bytes, required to write: "
|
||||
+ std::to_string(to_write) + ".",
|
||||
ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER);
|
||||
throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER,
|
||||
"Can not write past end of buffer. Space available {} bytes, required to write {} bytes.",
|
||||
available, to_write);
|
||||
}
|
||||
|
||||
const auto tmp_buffer = htobe64(static_cast<UInt64>(bits_buffer >> (sizeof(bits_buffer) - sizeof(UInt64)) * 8));
|
||||
const auto tmp_buffer = __builtin_bswap64(static_cast<UInt64>(bits_buffer >> (sizeof(bits_buffer) - sizeof(UInt64)) * 8));
|
||||
memcpy(dest_current, &tmp_buffer, to_write);
|
||||
dest_current += to_write;
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/readFloatText.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <base/find_symbols.h>
|
||||
@ -1120,7 +1121,7 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf)
|
||||
}
|
||||
}
|
||||
|
||||
void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
|
||||
void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current)
|
||||
{
|
||||
assert(current >= in.position());
|
||||
assert(current <= in.buffer().end());
|
||||
@ -1140,7 +1141,7 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
|
||||
in.position() = current;
|
||||
}
|
||||
|
||||
bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current)
|
||||
bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current)
|
||||
{
|
||||
assert(current <= in.buffer().end());
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <Core/UUID.h>
|
||||
|
||||
#include <Common/Allocator.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Arena.h>
|
||||
@ -29,7 +30,6 @@
|
||||
#include <IO/CompressionMethod.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/VarInt.h>
|
||||
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
@ -41,6 +41,9 @@ static constexpr auto DEFAULT_MAX_STRING_SIZE = 1_GiB;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename Allocator>
|
||||
struct Memory;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_PARSE_DATE;
|
||||
@ -1290,7 +1293,7 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
|
||||
/** This function just copies the data from buffer's internal position (in.position())
|
||||
* to current position (from arguments) into memory.
|
||||
*/
|
||||
void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current);
|
||||
void saveUpToPosition(ReadBuffer & in, Memory<Allocator<false>> & memory, char * current);
|
||||
|
||||
/** This function is negative to eof().
|
||||
* In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not.
|
||||
@ -1299,7 +1302,7 @@ void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current);
|
||||
* of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next().
|
||||
* And this function can fill the buffer with new data, so we will lose the data from previous buffer state.
|
||||
*/
|
||||
bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current);
|
||||
bool loadAtPosition(ReadBuffer & in, Memory<Allocator<false>> & memory, char * & current);
|
||||
|
||||
|
||||
struct PcgDeserializer
|
||||
|
@ -6,3 +6,6 @@ target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZ
|
||||
|
||||
add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS})
|
||||
target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
|
||||
|
||||
add_subdirectory(codegen_fuzzer)
|
||||
|
||||
|
48
src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
Normal file
48
src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
Normal file
@ -0,0 +1,48 @@
|
||||
find_package(Protobuf REQUIRED)
|
||||
|
||||
set (CURRENT_DIR_IN_SOURCES "${ClickHouse_SOURCE_DIR}/src/Parsers/fuzzers/codegen_fuzzer")
|
||||
set (CURRENT_DIR_IN_BINARY "${ClickHouse_BINARY_DIR}/src/Parsers/fuzzers/codegen_fuzzer")
|
||||
|
||||
# Copy scripts and template file to build directory to generate .proto and .cpp file from them
|
||||
configure_file(
|
||||
"${CURRENT_DIR_IN_SOURCES}/gen.py"
|
||||
"${CURRENT_DIR_IN_BINARY}/gen.py"
|
||||
COPYONLY)
|
||||
configure_file(
|
||||
"${CURRENT_DIR_IN_SOURCES}/update.sh"
|
||||
"${CURRENT_DIR_IN_BINARY}/update.sh"
|
||||
COPYONLY)
|
||||
|
||||
configure_file(
|
||||
"${CURRENT_DIR_IN_SOURCES}/clickhouse-template.g"
|
||||
"${CURRENT_DIR_IN_BINARY}/clickhouse-template.g"
|
||||
COPYONLY)
|
||||
|
||||
# Note that it depends on all.dict file!
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
"${CURRENT_DIR_IN_BINARY}/clickhouse.g"
|
||||
COMMAND ./update.sh "${ClickHouse_SOURCE_DIR}/tests/fuzz/all.dict"
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
"${CURRENT_DIR_IN_BINARY}/out.cpp"
|
||||
"${CURRENT_DIR_IN_BINARY}/out.proto"
|
||||
COMMAND python3 gen.py clickhouse.g out.cpp out.proto
|
||||
DEPENDS "${CURRENT_DIR_IN_BINARY}/clickhouse.g"
|
||||
)
|
||||
|
||||
PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS "${CURRENT_DIR_IN_BINARY}/out.proto")
|
||||
set(FUZZER_SRCS codegen_select_fuzzer.cpp "${CURRENT_DIR_IN_BINARY}/out.cpp" ${PROTO_SRCS} ${PROTO_HDRS})
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
|
||||
|
||||
add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
|
||||
|
||||
set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")
|
||||
|
||||
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}")
|
||||
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src")
|
||||
target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator dbms ${LIB_FUZZING_ENGINE})
|
121
src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g
Normal file
121
src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g
Normal file
@ -0,0 +1,121 @@
|
||||
" ";
|
||||
" ";
|
||||
" ";
|
||||
";";
|
||||
|
||||
|
||||
"(" $1 ")";
|
||||
"(" $1 ", " $2 ")";
|
||||
"(" $1 ", " $2 ", " $3 ")";
|
||||
|
||||
$1 ", " $2 ;
|
||||
$1 ", " $2 ", " $3 ;
|
||||
$1 ", " $2 ", " $3 ", " $4 ;
|
||||
$1 ", " $2 ", " $3 ", " $4 ", " $5 ;
|
||||
|
||||
"[" $1 ", " $2 "]";
|
||||
"[" $1 ", " $2 ", " $3 "]";
|
||||
"[" $1 ", " $2 ", " $3 ", " $4 "]";
|
||||
"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]";
|
||||
|
||||
$0 "(" $1 ")";
|
||||
$0 "(" $1 ", " $2 ")";
|
||||
$0 "(" $1 ", " $2 ", " $3 ")";
|
||||
|
||||
$1 " as " $2 ;
|
||||
|
||||
|
||||
// TODO: add more clickhouse specific stuff
|
||||
"SELECT " $1 " FROM " $2 " WHERE " $3 ;
|
||||
"SELECT " $1 " FROM " $2 " GROUP BY " $3 ;
|
||||
"SELECT " $1 " FROM " $2 " SORT BY " $3 ;
|
||||
"SELECT " $1 " FROM " $2 " LIMIT " $3 ;
|
||||
"SELECT " $1 " FROM " $2 " JOIN " $3 ;
|
||||
"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ;
|
||||
"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ;
|
||||
"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ;
|
||||
"SELECT " $1 " INTO OUTFILE " $2 ;
|
||||
|
||||
"WITH " $1 " AS " $2 ;
|
||||
|
||||
"{" $1 ":" $2 "}";
|
||||
"[" $1 "," $2 "]";
|
||||
"[]";
|
||||
|
||||
|
||||
" x ";
|
||||
"x";
|
||||
" `x` ";
|
||||
"`x`";
|
||||
|
||||
" \"value\" ";
|
||||
"\"value\"";
|
||||
" 0 ";
|
||||
"0";
|
||||
"1";
|
||||
"2";
|
||||
"123123123123123123";
|
||||
"182374019873401982734091873420923123123123123123";
|
||||
"1e-1";
|
||||
"1.1";
|
||||
"\"\"";
|
||||
" '../../../../../../../../../etc/passwd' ";
|
||||
|
||||
"/";
|
||||
"=";
|
||||
"==";
|
||||
"!=";
|
||||
"<>";
|
||||
"<";
|
||||
"<=";
|
||||
">";
|
||||
">=";
|
||||
"<<";
|
||||
"|<<";
|
||||
"&";
|
||||
"|";
|
||||
"||";
|
||||
"<|";
|
||||
"|>";
|
||||
"+";
|
||||
"-";
|
||||
"~";
|
||||
"*";
|
||||
"/";
|
||||
"\\";
|
||||
"%";
|
||||
"";
|
||||
".";
|
||||
",";
|
||||
",";
|
||||
",";
|
||||
",";
|
||||
",";
|
||||
",";
|
||||
"(";
|
||||
")";
|
||||
"(";
|
||||
")";
|
||||
"(";
|
||||
")";
|
||||
"(";
|
||||
")";
|
||||
"(";
|
||||
")";
|
||||
"(";
|
||||
")";
|
||||
"?";
|
||||
":";
|
||||
"@";
|
||||
"@@";
|
||||
"$";
|
||||
"\"";
|
||||
"`";
|
||||
"{";
|
||||
"}";
|
||||
"^";
|
||||
"::";
|
||||
"->";
|
||||
"]";
|
||||
"[";
|
||||
|
40
src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
Normal file
40
src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <Parsers/ParserQueryWithOutput.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
|
||||
#include <libfuzzer/libfuzzer_macro.h>
|
||||
|
||||
#include "out.pb.h"
|
||||
|
||||
void GenerateSentence(const Sentence&, std::string &, int);
|
||||
|
||||
|
||||
DEFINE_BINARY_PROTO_FUZZER(const Sentence& main)
|
||||
{
|
||||
static std::string input;
|
||||
input.reserve(4096);
|
||||
|
||||
GenerateSentence(main, input, 0);
|
||||
if (input.size())
|
||||
{
|
||||
std::cout << input << std::endl;
|
||||
|
||||
DB::ParserQueryWithOutput parser(input.data() + input.size());
|
||||
try
|
||||
{
|
||||
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
|
||||
|
||||
DB::WriteBufferFromOStream out(std::cerr, 4096);
|
||||
DB::formatAST(*ast, out);
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
catch (...) {}
|
||||
|
||||
input.clear();
|
||||
}
|
||||
}
|
248
src/Parsers/fuzzers/codegen_fuzzer/gen.py
Normal file
248
src/Parsers/fuzzers/codegen_fuzzer/gen.py
Normal file
@ -0,0 +1,248 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import string
|
||||
|
||||
|
||||
TOKEN_TEXT = 1
|
||||
TOKEN_VAR = 2
|
||||
|
||||
TOKEN_COLON = ':'
|
||||
TOKEN_SEMI = ';'
|
||||
TOKEN_OR = '|'
|
||||
TOKEN_QUESTIONMARK = '?'
|
||||
TOKEN_ROUND_BRACKET_OPEN = '('
|
||||
TOKEN_ROUND_BRACKET_CLOSE = ')'
|
||||
TOKEN_ASTERISK = '*'
|
||||
TOKEN_SLASH = '/'
|
||||
|
||||
|
||||
|
||||
|
||||
class TextValue:
|
||||
def __init__(self, t):
|
||||
self.t = t
|
||||
self.slug = None
|
||||
|
||||
def get_slug(self):
|
||||
if self.slug is not None:
|
||||
return self.slug
|
||||
slug = ''
|
||||
for c in self.t:
|
||||
slug += c if c in string.ascii_letters else '_'
|
||||
self.slug = slug
|
||||
return slug
|
||||
|
||||
def get_name(self):
|
||||
return f"TextValue_{self.get_slug()}"
|
||||
|
||||
def __repr__(self):
|
||||
return f"TextValue(\"{self.t}\")"
|
||||
|
||||
|
||||
class Var:
|
||||
def __init__(self, id_):
|
||||
self.id_ = id_
|
||||
|
||||
def __repr__(self):
|
||||
return f"Var({self.id_})"
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self):
|
||||
self.chains = []
|
||||
self.text = None
|
||||
self.col = 0
|
||||
self.line = 1
|
||||
self.t = None
|
||||
self.var_id = -1
|
||||
self.cur_tok = None
|
||||
self.includes = []
|
||||
|
||||
self.proto = ''
|
||||
self.cpp = ''
|
||||
|
||||
def parse_file(self, filename):
|
||||
with open(filename) as f:
|
||||
self.text = f.read()
|
||||
|
||||
while self.parse_statement() is not None:
|
||||
pass
|
||||
|
||||
def add_include(self, filename):
|
||||
self.includes.append(filename)
|
||||
|
||||
def get_next_token(self):
|
||||
self.skip_ws()
|
||||
|
||||
if not len(self.text):
|
||||
return None
|
||||
|
||||
if self.text[0] == '"':
|
||||
return self.parse_txt_value()
|
||||
|
||||
if self.text[0] == '$':
|
||||
return self.parse_var_value()
|
||||
|
||||
c, self.text = self.text[0], self.text[1:]
|
||||
self.cur_tok = c
|
||||
return c
|
||||
|
||||
def parse_var_value(self):
|
||||
i = self.text.find(' ')
|
||||
|
||||
id_, self.text = self.text[1:i], self.text[i+1:]
|
||||
self.var_id = int(id_)
|
||||
self.cur_tok = TOKEN_VAR
|
||||
return TOKEN_VAR
|
||||
|
||||
def parse_txt_value(self):
|
||||
if self.text[0] != '"':
|
||||
raise Exception("parse_txt_value: expected quote at the start")
|
||||
|
||||
self.t = ''
|
||||
self.text = self.text[1:]
|
||||
|
||||
while self.text[0] != '"':
|
||||
if self.text[0] == '\\':
|
||||
if self.text[1] == 'x':
|
||||
self.t += self.text[:4]
|
||||
self.text = self.text[4:]
|
||||
elif self.text[1] in 'nt\\"':
|
||||
self.t += self.text[:2]
|
||||
self.text = self.text[2:]
|
||||
else:
|
||||
raise Exception(f"parse_txt_value: unknown symbol {self.text[0]}")
|
||||
else:
|
||||
c, self.text = self.text[0], self.text[1:]
|
||||
self.t += c
|
||||
|
||||
self.text = self.text[1:]
|
||||
self.cur_tok = TOKEN_TEXT
|
||||
return TOKEN_TEXT
|
||||
|
||||
def skip_ws(self):
|
||||
while self.text and self.text[0] in string.whitespace:
|
||||
if self.text[0] == '\n':
|
||||
self.line += 1
|
||||
self.col = 0
|
||||
self.text = self.text[1:]
|
||||
self.col += 1
|
||||
if not self.text:
|
||||
return None
|
||||
return True
|
||||
|
||||
def skip_line(self):
|
||||
self.line += 1
|
||||
index = self.text.find('\n')
|
||||
self.text = self.text[index:]
|
||||
|
||||
|
||||
def parse_statement(self):
|
||||
if self.skip_ws() is None:
|
||||
return None
|
||||
|
||||
self.get_next_token()
|
||||
if self.cur_tok == TOKEN_SLASH:
|
||||
self.skip_line()
|
||||
return TOKEN_SLASH
|
||||
|
||||
chain = []
|
||||
while self.cur_tok != TOKEN_SEMI:
|
||||
if self.cur_tok == TOKEN_TEXT:
|
||||
chain.append(TextValue(self.t))
|
||||
elif self.cur_tok == TOKEN_VAR:
|
||||
chain.append(Var(self.var_id))
|
||||
else:
|
||||
self.fatal_parsing_error(f"unexpected token {self.cur_tok}")
|
||||
self.get_next_token()
|
||||
|
||||
if not chain:
|
||||
self.fatal_parsing_error("empty chains are not allowed")
|
||||
self.chains.append(chain)
|
||||
return True
|
||||
|
||||
def generate(self):
|
||||
self.proto = 'syntax = "proto3";\n\n'
|
||||
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n'
|
||||
|
||||
for incl_file in self.includes:
|
||||
self.cpp += f'#include "{incl_file}"\n'
|
||||
self.cpp += '\n'
|
||||
|
||||
self.proto += 'message Word {\n'
|
||||
self.proto += '\tenum Value {\n'
|
||||
|
||||
self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n'
|
||||
|
||||
self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n'
|
||||
self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n'
|
||||
self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n'
|
||||
self.cpp += '\t}\n'
|
||||
self.cpp += '}\n'
|
||||
|
||||
self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n'
|
||||
|
||||
self.cpp += '\tif (depth > 5) return;\n\n'
|
||||
self.cpp += '\tswitch (word.value()) {\n'
|
||||
|
||||
for idx, chain in enumerate(self.chains):
|
||||
self.proto += f'\t\tvalue_{idx} = {idx};\n'
|
||||
|
||||
self.cpp += f'\t\tcase {idx}: {{\n'
|
||||
num_var = 0
|
||||
for item in chain:
|
||||
if isinstance(item, TextValue):
|
||||
self.cpp += f'\t\t\ts += "{item.t}";\n'
|
||||
elif isinstance(item, Var):
|
||||
self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n'
|
||||
num_var += 1
|
||||
else:
|
||||
raise Exception("unknown token met during generation")
|
||||
self.cpp += '\t\t\tbreak;\n\t\t}\n'
|
||||
self.cpp += '\t\tdefault: break;\n'
|
||||
|
||||
self.cpp += '\t}\n'
|
||||
|
||||
self.proto += '\t}\n'
|
||||
self.proto += '\tValue value = 1;\n'
|
||||
self.proto += '\tSentence inner = 2;\n'
|
||||
self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}'
|
||||
|
||||
self.cpp += '}\n'
|
||||
return self.cpp, self.proto
|
||||
|
||||
def fatal_parsing_error(self, msg):
|
||||
print(f"Line: {self.line}, Col: {self.col}")
|
||||
raise Exception(f"fatal error during parsing. {msg}")
|
||||
|
||||
|
||||
def main(args):
|
||||
input_file, outfile_cpp, outfile_proto = args
|
||||
|
||||
if not outfile_proto.endswith('.proto'):
|
||||
raise Exception("outfile_proto (argv[3]) should end with `.proto`")
|
||||
|
||||
include_filename = outfile_proto[:-6] + ".pb.h"
|
||||
|
||||
p = Parser()
|
||||
p.add_include(include_filename)
|
||||
p.parse_file(input_file)
|
||||
|
||||
cpp, proto = p.generate()
|
||||
|
||||
proto = proto.replace('\t', ' ' * 4)
|
||||
cpp = cpp.replace('\t', ' ' * 4)
|
||||
|
||||
with open(outfile_cpp, 'w') as f:
|
||||
f.write(cpp)
|
||||
|
||||
with open(outfile_proto, 'w') as f:
|
||||
f.write(proto)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 3:
|
||||
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
|
||||
sys.exit(1)
|
||||
main(sys.argv[1:])
|
30
src/Parsers/fuzzers/codegen_fuzzer/update.sh
Executable file
30
src/Parsers/fuzzers/codegen_fuzzer/update.sh
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
_main() {
|
||||
local dict_filename="${1}"
|
||||
if [[ $# -ne 1 ]];
|
||||
then
|
||||
echo "Usage: $0 <dict_filename>";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [[ ! -f $dict_filename ]];
|
||||
then
|
||||
echo "File $dict_filename doesn't exist";
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cat clickhouse-template.g > clickhouse.g
|
||||
|
||||
while read line;
|
||||
do
|
||||
[[ -z "$line" ]] && continue
|
||||
echo $line | sed -e '/^#/d' -e 's/"\(.*\)"/" \1 ";/g'
|
||||
done < $dict_filename >> clickhouse.g
|
||||
}
|
||||
|
||||
_main "$@"
|
||||
|
||||
# Sample run: ./update.sh ${CLICKHOUSE_SOURCE_DIR}/tests/fuzz/all.dict
|
||||
# then run `python ./gen.py clickhouse.g out.cpp out.proto` to generate new files with tokens. Rebuild fuzzer
|
@ -1,4 +1,5 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <Formats/verbosePrintString.h>
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
|
||||
#include <Storages/MergeTree/FutureMergedMutatedPart.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Transforms/MaterializingTransform.h>
|
||||
#include <Processors/Merges/MergingSortedTransform.h>
|
||||
@ -116,11 +117,23 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
}
|
||||
|
||||
ctx->disk = global_ctx->space_reservation->getDisk();
|
||||
auto local_new_part_relative_tmp_path_name = local_tmp_prefix + global_ctx->future_part->name + local_tmp_suffix;
|
||||
auto local_new_part_tmp_path = global_ctx->data->relative_data_path + local_new_part_relative_tmp_path_name + "/";
|
||||
|
||||
String local_part_path = global_ctx->data->relative_data_path;
|
||||
String local_tmp_part_basename = local_tmp_prefix + global_ctx->future_part->name + (global_ctx->parent_part ? ".proj" : "");
|
||||
String local_new_part_tmp_path = local_part_path + local_tmp_part_basename + "/";
|
||||
|
||||
if (ctx->disk->exists(local_new_part_tmp_path))
|
||||
throw Exception("Directory " + fullPath(ctx->disk, local_new_part_tmp_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS);
|
||||
|
||||
{
|
||||
std::lock_guard lock(global_ctx->mutator->tmp_parts_lock);
|
||||
global_ctx->mutator->tmp_parts.emplace(local_tmp_part_basename);
|
||||
}
|
||||
SCOPE_EXIT(
|
||||
std::lock_guard lock(global_ctx->mutator->tmp_parts_lock);
|
||||
global_ctx->mutator->tmp_parts.erase(local_tmp_part_basename);
|
||||
);
|
||||
|
||||
global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical();
|
||||
global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical();
|
||||
|
||||
@ -141,7 +154,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
global_ctx->future_part->type,
|
||||
global_ctx->future_part->part_info,
|
||||
local_single_disk_volume,
|
||||
local_new_part_relative_tmp_path_name,
|
||||
local_tmp_part_basename,
|
||||
global_ctx->parent_part);
|
||||
|
||||
global_ctx->new_data_part->uuid = global_ctx->future_part->uuid;
|
||||
@ -560,6 +573,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
|
||||
global_ctx->new_data_part.get(),
|
||||
".proj",
|
||||
global_ctx->data,
|
||||
global_ctx->mutator,
|
||||
global_ctx->merges_blocker,
|
||||
global_ctx->ttl_merges_blocker));
|
||||
}
|
||||
|
@ -60,6 +60,7 @@ public:
|
||||
const IMergeTreeDataPart * parent_part_,
|
||||
String suffix_,
|
||||
MergeTreeData * data_,
|
||||
MergeTreeDataMergerMutator * mutator_,
|
||||
ActionBlocker * merges_blocker_,
|
||||
ActionBlocker * ttl_merges_blocker_)
|
||||
{
|
||||
@ -78,6 +79,7 @@ public:
|
||||
global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_);
|
||||
global_ctx->parent_part = std::move(parent_part_);
|
||||
global_ctx->data = std::move(data_);
|
||||
global_ctx->mutator = std::move(mutator_);
|
||||
global_ctx->merges_blocker = std::move(merges_blocker_);
|
||||
global_ctx->ttl_merges_blocker = std::move(ttl_merges_blocker_);
|
||||
|
||||
@ -121,6 +123,7 @@ private:
|
||||
std::unique_ptr<MergeListElement> projection_merge_list_element;
|
||||
MergeListElement * merge_list_element_ptr{nullptr};
|
||||
MergeTreeData * data{nullptr};
|
||||
MergeTreeDataMergerMutator * mutator{nullptr};
|
||||
ActionBlocker * merges_blocker{nullptr};
|
||||
ActionBlocker * ttl_merges_blocker{nullptr};
|
||||
StorageMetadataPtr metadata_snapshot{nullptr};
|
||||
|
@ -1343,7 +1343,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa
|
||||
}
|
||||
|
||||
|
||||
void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds)
|
||||
void MergeTreeData::clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds)
|
||||
{
|
||||
/// If the method is already called from another thread, then we don't need to do anything.
|
||||
std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock);
|
||||
@ -1359,35 +1359,44 @@ void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifet
|
||||
{
|
||||
for (auto it = disk->iterateDirectory(path); it->isValid(); it->next())
|
||||
{
|
||||
if (startsWith(it->name(), "tmp_"))
|
||||
const std::string & basename = it->name();
|
||||
if (!startsWith(basename, "tmp_"))
|
||||
{
|
||||
try
|
||||
continue;
|
||||
}
|
||||
const std::string & full_path = fullPath(disk, it->path());
|
||||
if (merger_mutator.hasTemporaryPart(basename))
|
||||
{
|
||||
LOG_WARNING(log, "{} is an active destination for one of merge/mutation (consider increasing temporary_directories_lifetime setting)", full_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline))
|
||||
{
|
||||
if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline))
|
||||
{
|
||||
LOG_WARNING(log, "Removing temporary directory {}", fullPath(disk, it->path()));
|
||||
disk->removeRecursive(it->path());
|
||||
}
|
||||
LOG_WARNING(log, "Removing temporary directory {}", full_path);
|
||||
disk->removeRecursive(it->path());
|
||||
}
|
||||
/// see getModificationTime()
|
||||
catch (const ErrnoException & e)
|
||||
}
|
||||
/// see getModificationTime()
|
||||
catch (const ErrnoException & e)
|
||||
{
|
||||
if (e.getErrno() == ENOENT)
|
||||
{
|
||||
if (e.getErrno() == ENOENT)
|
||||
{
|
||||
/// If the file is already deleted, do nothing.
|
||||
}
|
||||
else
|
||||
throw;
|
||||
/// If the file is already deleted, do nothing.
|
||||
}
|
||||
catch (const fs::filesystem_error & e)
|
||||
else
|
||||
throw;
|
||||
}
|
||||
catch (const fs::filesystem_error & e)
|
||||
{
|
||||
if (e.code() == std::errc::no_such_file_or_directory)
|
||||
{
|
||||
if (e.code() == std::errc::no_such_file_or_directory)
|
||||
{
|
||||
/// If the file is already deleted, do nothing.
|
||||
}
|
||||
else
|
||||
throw;
|
||||
/// If the file is already deleted, do nothing.
|
||||
}
|
||||
else
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ namespace DB
|
||||
|
||||
class AlterCommands;
|
||||
class MergeTreePartsMover;
|
||||
class MergeTreeDataMergerMutator;
|
||||
class MutationCommands;
|
||||
class Context;
|
||||
struct JobAndPool;
|
||||
@ -536,7 +537,7 @@ public:
|
||||
|
||||
/// Delete all directories which names begin with "tmp"
|
||||
/// Must be called with locked lockForShare() because it's using relative_data_path.
|
||||
void clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds);
|
||||
void clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds);
|
||||
|
||||
void clearEmptyParts();
|
||||
|
||||
|
@ -443,6 +443,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
|
||||
parent_part,
|
||||
suffix,
|
||||
&data,
|
||||
this,
|
||||
&merges_blocker,
|
||||
&ttl_merges_blocker);
|
||||
}
|
||||
@ -773,4 +774,10 @@ ExecuteTTLType MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadat
|
||||
}
|
||||
|
||||
|
||||
bool MergeTreeDataMergerMutator::hasTemporaryPart(const std::string & basename) const
|
||||
{
|
||||
std::lock_guard lock(tmp_parts_lock);
|
||||
return tmp_parts.contains(basename);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <functional>
|
||||
|
||||
#include <Common/ActionBlocker.h>
|
||||
@ -136,6 +137,7 @@ private:
|
||||
MergeTreeData::DataPartsVector selectAllPartsFromPartition(const String & partition_id);
|
||||
|
||||
friend class MutateTask;
|
||||
friend class MergeTask;
|
||||
|
||||
/** Split mutation commands into two parts:
|
||||
* First part should be executed by mutations interpreter.
|
||||
@ -190,6 +192,26 @@ private:
|
||||
ITTLMergeSelector::PartitionIdToTTLs next_recompress_ttl_merge_times_by_partition;
|
||||
/// Performing TTL merges independently for each partition guarantees that
|
||||
/// there is only a limited number of TTL merges and no partition stores data, that is too stale
|
||||
|
||||
public:
|
||||
/// Returns true if passed part name is active.
|
||||
/// (is the destination for one of active mutation/merge).
|
||||
///
|
||||
/// NOTE: that it accept basename (i.e. dirname), not the path,
|
||||
/// since later requires canonical form.
|
||||
bool hasTemporaryPart(const std::string & basename) const;
|
||||
|
||||
private:
|
||||
/// Set of active temporary paths that is used as the destination.
|
||||
/// List of such paths is required to avoid trying to remove them during cleanup.
|
||||
///
|
||||
/// NOTE: It is pretty short, so use STL is fine.
|
||||
std::unordered_set<std::string> tmp_parts;
|
||||
/// Lock for "tmp_parts".
|
||||
///
|
||||
/// NOTE: mutable is required to mark hasTemporaryPath() const
|
||||
mutable std::mutex tmp_parts_lock;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
@ -142,30 +142,6 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read,
|
||||
prewhere_info && prewhere_info->remove_prewhere_column, per_part_should_reorder[part_idx], std::move(curr_task_size_predictor));
|
||||
}
|
||||
|
||||
MarkRanges MergeTreeReadPool::getRestMarks(const IMergeTreeDataPart & part, const MarkRange & from) const
|
||||
{
|
||||
MarkRanges all_part_ranges;
|
||||
|
||||
/// Inefficient in presence of large number of data parts.
|
||||
for (const auto & part_ranges : parts_ranges)
|
||||
{
|
||||
if (part_ranges.data_part.get() == &part)
|
||||
{
|
||||
all_part_ranges = part_ranges.ranges;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (all_part_ranges.empty())
|
||||
throw Exception("Trying to read marks range [" + std::to_string(from.begin) + ", " + std::to_string(from.end) + "] from part '"
|
||||
+ part.getFullPath() + "' which has no ranges in this query", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto begin = std::lower_bound(all_part_ranges.begin(), all_part_ranges.end(), from, [] (const auto & f, const auto & s) { return f.begin < s.begin; });
|
||||
if (begin == all_part_ranges.end())
|
||||
begin = std::prev(all_part_ranges.end());
|
||||
begin->begin = from.begin;
|
||||
return MarkRanges(begin, all_part_ranges.end());
|
||||
}
|
||||
|
||||
Block MergeTreeReadPool::getHeader() const
|
||||
{
|
||||
return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals(), data.getStorageID());
|
||||
|
@ -85,9 +85,6 @@ public:
|
||||
*/
|
||||
void profileFeedback(const ReadBufferFromFileBase::ProfileInfo info);
|
||||
|
||||
/// This method tells which mark ranges we have to read if we start from @from mark range
|
||||
MarkRanges getRestMarks(const IMergeTreeDataPart & part, const MarkRange & from) const;
|
||||
|
||||
Block getHeader() const;
|
||||
|
||||
private:
|
||||
|
@ -68,18 +68,16 @@ bool MergeTreeThreadSelectProcessor::getNewTask()
|
||||
|
||||
if (!reader)
|
||||
{
|
||||
auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]);
|
||||
|
||||
if (use_uncompressed_cache)
|
||||
owned_uncompressed_cache = storage.getContext()->getUncompressedCache();
|
||||
owned_mark_cache = storage.getContext()->getMarkCache();
|
||||
|
||||
reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges,
|
||||
reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges,
|
||||
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings,
|
||||
IMergeTreeReader::ValueSizeMap{}, profile_callback);
|
||||
|
||||
if (prewhere_info)
|
||||
pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges,
|
||||
pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges,
|
||||
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings,
|
||||
IMergeTreeReader::ValueSizeMap{}, profile_callback);
|
||||
}
|
||||
@ -88,14 +86,13 @@ bool MergeTreeThreadSelectProcessor::getNewTask()
|
||||
/// in other case we can reuse readers, anyway they will be "seeked" to required mark
|
||||
if (part_name != last_readed_part_name)
|
||||
{
|
||||
auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]);
|
||||
/// retain avg_value_size_hints
|
||||
reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges,
|
||||
reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges,
|
||||
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings,
|
||||
reader->getAvgValueSizeHints(), profile_callback);
|
||||
|
||||
if (prewhere_info)
|
||||
pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges,
|
||||
pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges,
|
||||
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings,
|
||||
reader->getAvgValueSizeHints(), profile_callback);
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ void ReplicatedMergeTreeCleanupThread::iterate()
|
||||
/// Both use relative_data_path which changes during rename, so we
|
||||
/// do it under share lock
|
||||
storage.clearOldWriteAheadLogs();
|
||||
storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds());
|
||||
storage.clearOldTemporaryDirectories(storage.merger_mutator, storage.getSettings()->temporary_directories_lifetime.totalSeconds());
|
||||
}
|
||||
|
||||
/// This is loose condition: no problem if we actually had lost leadership at this moment
|
||||
|
@ -105,7 +105,7 @@ void StorageMergeTree::startup()
|
||||
|
||||
/// Temporary directories contain incomplete results of merges (after forced restart)
|
||||
/// and don't allow to reinitialize them, so delete each of them immediately
|
||||
clearOldTemporaryDirectories(0);
|
||||
clearOldTemporaryDirectories(merger_mutator, 0);
|
||||
|
||||
/// NOTE background task will also do the above cleanups periodically.
|
||||
time_after_previous_cleanup_parts.restart();
|
||||
@ -1063,7 +1063,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign
|
||||
assignee.scheduleCommonTask(ExecutableLambdaAdapter::create(
|
||||
[this, share_lock] ()
|
||||
{
|
||||
clearOldTemporaryDirectories(getSettings()->temporary_directories_lifetime.totalSeconds());
|
||||
clearOldTemporaryDirectories(merger_mutator, getSettings()->temporary_directories_lifetime.totalSeconds());
|
||||
return true;
|
||||
}, common_assignee_trigger, getStorageID()));
|
||||
scheduled = true;
|
||||
|
@ -478,7 +478,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
}
|
||||
/// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart),
|
||||
/// don't allow to reinitialize them, delete each of them immediately.
|
||||
clearOldTemporaryDirectories(0);
|
||||
clearOldTemporaryDirectories(merger_mutator, 0);
|
||||
clearOldWriteAheadLogs();
|
||||
}
|
||||
|
||||
|
@ -109,7 +109,10 @@ def clickhouse_execute_json(base_args, query, timeout=30, settings=None):
|
||||
data = clickhouse_execute_http(base_args, query, timeout, settings, 'JSONEachRow')
|
||||
if not data:
|
||||
return None
|
||||
return json.loads(data)
|
||||
rows = []
|
||||
for row in data.strip().splitlines():
|
||||
rows.append(json.loads(row))
|
||||
return rows
|
||||
|
||||
|
||||
class Terminated(KeyboardInterrupt):
|
||||
@ -475,19 +478,19 @@ class TestCase:
|
||||
|
||||
if os.path.isfile(self.stdout_file):
|
||||
description += ", result:\n\n"
|
||||
description += '\n'.join(open(self.stdout_file).read().split('\n')[:100])
|
||||
description += '\n'.join(open(self.stdout_file).read().splitlines()[:100])
|
||||
description += '\n'
|
||||
|
||||
description += "\nstdout:\n{}\n".format(stdout)
|
||||
return TestResult(self.name, TestStatus.FAIL, reason, total_time, description)
|
||||
|
||||
if stderr:
|
||||
description += "\n{}\n".format('\n'.join(stderr.split('\n')[:100]))
|
||||
description += "\n{}\n".format('\n'.join(stderr.splitlines()[:100]))
|
||||
description += "\nstdout:\n{}\n".format(stdout)
|
||||
return TestResult(self.name, TestStatus.FAIL, FailureReason.STDERR, total_time, description)
|
||||
|
||||
if 'Exception' in stdout:
|
||||
description += "\n{}\n".format('\n'.join(stdout.split('\n')[:100]))
|
||||
description += "\n{}\n".format('\n'.join(stdout.splitlines()[:100]))
|
||||
return TestResult(self.name, TestStatus.FAIL, FailureReason.EXCEPTION, total_time, description)
|
||||
|
||||
if '@@SKIP@@' in stdout:
|
||||
@ -1392,7 +1395,6 @@ if __name__ == '__main__':
|
||||
http_port = os.getenv("CLICKHOUSE_PORT_HTTP")
|
||||
if http_port is not None:
|
||||
args.http_port = int(http_port)
|
||||
args.client += f" --port={http_port}"
|
||||
else:
|
||||
args.http_port = 8123
|
||||
|
||||
|
126
tests/performance/hashed_array_dictionary.xml
Normal file
126
tests/performance/hashed_array_dictionary.xml
Normal file
@ -0,0 +1,126 @@
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE simple_key_hashed_array_dictionary_source_table
|
||||
(
|
||||
id UInt64,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
) ENGINE = Memory;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE complex_key_hashed_array_dictionary_source_table
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
) ENGINE = Memory;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY simple_key_hashed_array_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_hashed_array_dictionary_source_table'))
|
||||
LAYOUT(HASHED_ARRAY())
|
||||
LIFETIME(MIN 0 MAX 1000);
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY complex_key_hashed_array_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_int UInt64,
|
||||
value_string String,
|
||||
value_decimal Decimal64(8),
|
||||
value_string_nullable Nullable(String)
|
||||
)
|
||||
PRIMARY KEY id, id_key
|
||||
SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_key_hashed_array_dictionary_source_table'))
|
||||
LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
|
||||
LIFETIME(MIN 0 MAX 1000);
|
||||
</create_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO simple_key_hashed_array_dictionary_source_table
|
||||
SELECT number, number, toString(number), toDecimal64(number, 8), toString(number)
|
||||
FROM system.numbers
|
||||
LIMIT 5000000;
|
||||
</fill_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO complex_key_hashed_array_dictionary_source_table
|
||||
SELECT number, toString(number), number, toString(number), toDecimal64(number, 8), toString(number)
|
||||
FROM system.numbers
|
||||
LIMIT 5000000;
|
||||
</fill_query>
|
||||
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>column_name</name>
|
||||
<values>
|
||||
<value>'value_int'</value>
|
||||
<value>'value_string'</value>
|
||||
<value>'value_decimal'</value>
|
||||
<value>'value_string_nullable'</value>
|
||||
</values>
|
||||
</substitution>
|
||||
|
||||
<substitution>
|
||||
<name>elements_count</name>
|
||||
<values>
|
||||
<value>5000000</value>
|
||||
<value>7500000</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<query>
|
||||
WITH rand64() % toUInt64({elements_count}) as key
|
||||
SELECT dictGet('default.simple_key_hashed_array_dictionary', {column_name}, key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
<query>
|
||||
WITH rand64() % toUInt64({elements_count}) as key
|
||||
SELECT dictHas('default.simple_key_hashed_array_dictionary', key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<query>
|
||||
WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
|
||||
SELECT dictGet('default.complex_key_hashed_array_dictionary', {column_name}, key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
<query>
|
||||
WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
|
||||
SELECT dictHas('default.complex_key_hashed_array_dictionary', key)
|
||||
FROM system.numbers
|
||||
LIMIT {elements_count}
|
||||
FORMAT Null;
|
||||
</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table;</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table;</drop_query>
|
||||
|
||||
<drop_query>DROP DICTIONARY IF EXISTS simple_key_hashed_array_dictionary;</drop_query>
|
||||
<drop_query>DROP DICTIONARY IF EXISTS complex_key_hashed_array_dictionary;</drop_query>
|
||||
|
||||
</test>
|
@ -0,0 +1,15 @@
|
||||
execute: default
|
||||
"foo"
|
||||
1
|
||||
execute: --stage fetch_columns
|
||||
"dummy"
|
||||
0
|
||||
execute: --stage with_mergeable_state
|
||||
"1"
|
||||
1
|
||||
execute: --stage with_mergeable_state_after_aggregation
|
||||
"1"
|
||||
1
|
||||
execute: --stage complete
|
||||
"foo"
|
||||
1
|
22
tests/queries/0_stateless/02048_clickhouse_local_stage.sh
Executable file
22
tests/queries/0_stateless/02048_clickhouse_local_stage.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
function execute_query()
|
||||
{
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "execute: default"
|
||||
else
|
||||
echo "execute: $*"
|
||||
fi
|
||||
${CLICKHOUSE_LOCAL} "$@" --format CSVWithNames -q "SELECT 1 AS foo"
|
||||
}
|
||||
|
||||
execute_query # default -- complete
|
||||
execute_query --stage fetch_columns
|
||||
execute_query --stage with_mergeable_state
|
||||
execute_query --stage with_mergeable_state_after_aggregation
|
||||
execute_query --stage complete
|
@ -0,0 +1,60 @@
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 40
|
||||
5 50
|
||||
6 60
|
||||
7 70
|
||||
8 800
|
||||
9 900
|
||||
10 1000
|
||||
11 1100
|
||||
12 12000
|
||||
13 13000
|
||||
14 14000
|
||||
15 15000
|
||||
16 160000
|
||||
17 170000
|
||||
18 180000
|
||||
19 190000
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 40
|
||||
5 50
|
||||
6 60
|
||||
7 70
|
||||
8 80000
|
||||
9 90000
|
||||
10 100000
|
||||
11 110000
|
||||
12 120000
|
||||
13 130000
|
||||
14 140000
|
||||
15 150000
|
||||
16 160000
|
||||
17 170000
|
||||
18 180000
|
||||
19 190000
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 40
|
||||
5 50
|
||||
6 60
|
||||
7 70
|
||||
8 800
|
||||
9 900
|
||||
10 1000
|
||||
11 1100
|
||||
12 12000
|
||||
13 13000
|
||||
14 14000
|
||||
15 15000
|
||||
16 160000
|
||||
17 170000
|
||||
18 180000
|
||||
19 190000
|
@ -0,0 +1,45 @@
|
||||
-- https://github.com/ClickHouse/ClickHouse/issues/30231
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT number,
|
||||
multiIf(
|
||||
CAST(number < 4, 'UInt8'), toString(number),
|
||||
CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10),
|
||||
CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100),
|
||||
CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000),
|
||||
toString(number * 10000)) as m
|
||||
FROM system.numbers
|
||||
LIMIT 20
|
||||
)
|
||||
ORDER BY number
|
||||
SETTINGS short_circuit_function_evaluation='enable';
|
||||
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT number,
|
||||
multiIf(
|
||||
CAST(number < 4, 'UInt8'), toString(number),
|
||||
CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10),
|
||||
CAST(NULL, 'Nullable(UInt8)'), toString(number * 100),
|
||||
CAST(NULL, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000),
|
||||
toString(number * 10000)) as m
|
||||
FROM system.numbers
|
||||
LIMIT 20
|
||||
)
|
||||
ORDER BY number
|
||||
SETTINGS short_circuit_function_evaluation='enable';
|
||||
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT number,
|
||||
multiIf(
|
||||
CAST(number < 4, 'UInt8'), toString(number),
|
||||
CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10)::LowCardinality(String),
|
||||
CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100)::Nullable(String),
|
||||
CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000)::LowCardinality(Nullable(String)),
|
||||
toString(number * 10000)) as m
|
||||
FROM system.numbers
|
||||
LIMIT 20
|
||||
)
|
||||
ORDER BY number
|
||||
SETTINGS short_circuit_function_evaluation='enable';
|
@ -0,0 +1,66 @@
|
||||
Dictionary hashed_array_dictionary_simple_key_simple_attributes
|
||||
dictGet existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
dictGet with non existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
value_first_default value_second_default
|
||||
dictGetOrDefault existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
dictGetOrDefault non existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
default default
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select all values as input stream
|
||||
0 value_0 value_second_0
|
||||
1 value_1 value_second_1
|
||||
2 value_2 value_second_2
|
||||
Dictionary hashed_array_dictionary_simple_key_complex_attributes
|
||||
dictGet existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
dictGet with non existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
value_first_default value_second_default
|
||||
dictGetOrDefault existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
dictGetOrDefault non existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
default default
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select all values as input stream
|
||||
0 value_0 value_second_0
|
||||
1 value_1 \N
|
||||
2 value_2 value_second_2
|
||||
Dictionary hashed_array_dictionary_simple_key_hierarchy
|
||||
dictGet
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
2
|
||||
dictGetHierarchy
|
||||
[1]
|
||||
[4,2,1]
|
@ -0,0 +1,125 @@
|
||||
DROP TABLE IF EXISTS simple_key_simple_attributes_source_table;
|
||||
CREATE TABLE simple_key_simple_attributes_source_table
|
||||
(
|
||||
id UInt64,
|
||||
value_first String,
|
||||
value_second String
|
||||
)
|
||||
ENGINE = TinyLog;
|
||||
|
||||
INSERT INTO simple_key_simple_attributes_source_table VALUES(0, 'value_0', 'value_second_0');
|
||||
INSERT INTO simple_key_simple_attributes_source_table VALUES(1, 'value_1', 'value_second_1');
|
||||
INSERT INTO simple_key_simple_attributes_source_table VALUES(2, 'value_2', 'value_second_2');
|
||||
|
||||
DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_simple_attributes;
|
||||
CREATE DICTIONARY hashed_array_dictionary_simple_key_simple_attributes
|
||||
(
|
||||
id UInt64,
|
||||
value_first String DEFAULT 'value_first_default',
|
||||
value_second String DEFAULT 'value_second_default'
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'simple_key_simple_attributes_source_table'))
|
||||
LAYOUT(HASHED_ARRAY())
|
||||
LIFETIME(MIN 1 MAX 1000);
|
||||
|
||||
SELECT 'Dictionary hashed_array_dictionary_simple_key_simple_attributes';
|
||||
SELECT 'dictGet existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number) as value_first,
|
||||
dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGet with non existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number) as value_first,
|
||||
dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictGetOrDefault existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number, toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGetOrDefault non existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number, toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('hashed_array_dictionary_simple_key_simple_attributes', number) FROM system.numbers LIMIT 4;
|
||||
SELECT 'select all values as input stream';
|
||||
SELECT * FROM hashed_array_dictionary_simple_key_simple_attributes ORDER BY id;
|
||||
|
||||
DROP DICTIONARY hashed_array_dictionary_simple_key_simple_attributes;
|
||||
|
||||
DROP TABLE simple_key_simple_attributes_source_table;
|
||||
|
||||
DROP TABLE IF EXISTS simple_key_complex_attributes_source_table;
|
||||
CREATE TABLE simple_key_complex_attributes_source_table
|
||||
(
|
||||
id UInt64,
|
||||
value_first String,
|
||||
value_second Nullable(String)
|
||||
)
|
||||
ENGINE = TinyLog;
|
||||
|
||||
INSERT INTO simple_key_complex_attributes_source_table VALUES(0, 'value_0', 'value_second_0');
|
||||
INSERT INTO simple_key_complex_attributes_source_table VALUES(1, 'value_1', NULL);
|
||||
INSERT INTO simple_key_complex_attributes_source_table VALUES(2, 'value_2', 'value_second_2');
|
||||
|
||||
DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_complex_attributes;
|
||||
CREATE DICTIONARY hashed_array_dictionary_simple_key_complex_attributes
|
||||
(
|
||||
id UInt64,
|
||||
value_first String DEFAULT 'value_first_default',
|
||||
value_second Nullable(String) DEFAULT 'value_second_default'
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'simple_key_complex_attributes_source_table'))
|
||||
LAYOUT(HASHED_ARRAY())
|
||||
LIFETIME(MIN 1 MAX 1000);
|
||||
|
||||
SELECT 'Dictionary hashed_array_dictionary_simple_key_complex_attributes';
|
||||
SELECT 'dictGet existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number) as value_first,
|
||||
dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGet with non existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number) as value_first,
|
||||
dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictGetOrDefault existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number, toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGetOrDefault non existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number, toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('hashed_array_dictionary_simple_key_complex_attributes', number) FROM system.numbers LIMIT 4;
|
||||
SELECT 'select all values as input stream';
|
||||
SELECT * FROM hashed_array_dictionary_simple_key_complex_attributes ORDER BY id;
|
||||
|
||||
DROP DICTIONARY hashed_array_dictionary_simple_key_complex_attributes;
|
||||
DROP TABLE simple_key_complex_attributes_source_table;
|
||||
|
||||
DROP TABLE IF EXISTS simple_key_hierarchy_table;
|
||||
CREATE TABLE simple_key_hierarchy_table
|
||||
(
|
||||
id UInt64,
|
||||
parent_id UInt64
|
||||
) ENGINE = TinyLog();
|
||||
|
||||
INSERT INTO simple_key_hierarchy_table VALUES (1, 0);
|
||||
INSERT INTO simple_key_hierarchy_table VALUES (2, 1);
|
||||
INSERT INTO simple_key_hierarchy_table VALUES (3, 1);
|
||||
INSERT INTO simple_key_hierarchy_table VALUES (4, 2);
|
||||
|
||||
DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_hierarchy;
|
||||
CREATE DICTIONARY hashed_array_dictionary_simple_key_hierarchy
|
||||
(
|
||||
id UInt64,
|
||||
parent_id UInt64 HIERARCHICAL
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_table'))
|
||||
LAYOUT(HASHED_ARRAY())
|
||||
LIFETIME(MIN 1 MAX 1000);
|
||||
|
||||
SELECT 'Dictionary hashed_array_dictionary_simple_key_hierarchy';
|
||||
SELECT 'dictGet';
|
||||
SELECT dictGet('hashed_array_dictionary_simple_key_hierarchy', 'parent_id', number) FROM system.numbers LIMIT 5;
|
||||
SELECT 'dictGetHierarchy';
|
||||
SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(1));
|
||||
SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(4));
|
||||
|
||||
DROP DICTIONARY hashed_array_dictionary_simple_key_hierarchy;
|
||||
DROP TABLE simple_key_hierarchy_table;
|
@ -0,0 +1,56 @@
|
||||
Dictionary hashed_array_dictionary_complex_key_simple_attributes
|
||||
dictGet existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
dictGet with non existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
value_first_default value_second_default
|
||||
dictGetOrDefault existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
dictGetOrDefault non existing value
|
||||
value_0 value_second_0
|
||||
value_1 value_second_1
|
||||
value_2 value_second_2
|
||||
default default
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select all values as input stream
|
||||
0 id_key_0 value_0 value_second_0
|
||||
1 id_key_1 value_1 value_second_1
|
||||
2 id_key_2 value_2 value_second_2
|
||||
Dictionary hashed_array_dictionary_complex_key_complex_attributes
|
||||
dictGet existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
dictGet with non existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
value_first_default value_second_default
|
||||
dictGetOrDefault existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
dictGetOrDefault non existing value
|
||||
value_0 value_second_0
|
||||
value_1 \N
|
||||
value_2 value_second_2
|
||||
default default
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select all values as input stream
|
||||
0 id_key_0 value_0 value_second_0
|
||||
1 id_key_1 value_1 \N
|
||||
2 id_key_2 value_2 value_second_2
|
@ -0,0 +1,97 @@
|
||||
DROP TABLE IF EXISTS complex_key_simple_attributes_source_table;
|
||||
CREATE TABLE complex_key_simple_attributes_source_table
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_first String,
|
||||
value_second String
|
||||
)
|
||||
ENGINE = TinyLog;
|
||||
|
||||
INSERT INTO complex_key_simple_attributes_source_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0');
|
||||
INSERT INTO complex_key_simple_attributes_source_table VALUES(1, 'id_key_1', 'value_1', 'value_second_1');
|
||||
INSERT INTO complex_key_simple_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2');
|
||||
|
||||
DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_simple_attributes;
|
||||
CREATE DICTIONARY hashed_array_dictionary_complex_key_simple_attributes
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_first String DEFAULT 'value_first_default',
|
||||
value_second String DEFAULT 'value_second_default'
|
||||
)
|
||||
PRIMARY KEY id, id_key
|
||||
SOURCE(CLICKHOUSE(TABLE 'complex_key_simple_attributes_source_table'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(COMPLEX_KEY_HASHED_ARRAY());
|
||||
|
||||
SELECT 'Dictionary hashed_array_dictionary_complex_key_simple_attributes';
|
||||
SELECT 'dictGet existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first,
|
||||
dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGet with non existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first,
|
||||
dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictGetOrDefault existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGetOrDefault non existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('hashed_array_dictionary_complex_key_simple_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4;
|
||||
SELECT 'select all values as input stream';
|
||||
SELECT * FROM hashed_array_dictionary_complex_key_simple_attributes ORDER BY (id, id_key);
|
||||
|
||||
DROP DICTIONARY hashed_array_dictionary_complex_key_simple_attributes;
|
||||
|
||||
DROP TABLE complex_key_simple_attributes_source_table;
|
||||
|
||||
DROP TABLE IF EXISTS complex_key_complex_attributes_source_table;
|
||||
CREATE TABLE complex_key_complex_attributes_source_table
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_first String,
|
||||
value_second Nullable(String)
|
||||
)
|
||||
ENGINE = TinyLog;
|
||||
|
||||
INSERT INTO complex_key_complex_attributes_source_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0');
|
||||
INSERT INTO complex_key_complex_attributes_source_table VALUES(1, 'id_key_1', 'value_1', NULL);
|
||||
INSERT INTO complex_key_complex_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2');
|
||||
|
||||
DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_complex_attributes;
|
||||
CREATE DICTIONARY hashed_array_dictionary_complex_key_complex_attributes
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
|
||||
value_first String DEFAULT 'value_first_default',
|
||||
value_second Nullable(String) DEFAULT 'value_second_default'
|
||||
)
|
||||
PRIMARY KEY id, id_key
|
||||
SOURCE(CLICKHOUSE(TABLE 'complex_key_complex_attributes_source_table'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(COMPLEX_KEY_HASHED_ARRAY());
|
||||
|
||||
SELECT 'Dictionary hashed_array_dictionary_complex_key_complex_attributes';
|
||||
SELECT 'dictGet existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first,
|
||||
dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGet with non existing value';
|
||||
SELECT dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first,
|
||||
dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictGetOrDefault existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 3;
|
||||
SELECT 'dictGetOrDefault non existing value';
|
||||
SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first,
|
||||
dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 4;
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('hashed_array_dictionary_complex_key_complex_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4;
|
||||
SELECT 'select all values as input stream';
|
||||
SELECT * FROM hashed_array_dictionary_complex_key_complex_attributes ORDER BY (id, id_key);
|
||||
|
||||
DROP DICTIONARY hashed_array_dictionary_complex_key_complex_attributes;
|
||||
DROP TABLE complex_key_complex_attributes_source_table;
|
@ -0,0 +1,2 @@
|
||||
468426149779992039
|
||||
1
|
7
tests/queries/1_stateful/00167_read_bytes_from_fs.sql
Normal file
7
tests/queries/1_stateful/00167_read_bytes_from_fs.sql
Normal file
@ -0,0 +1,7 @@
|
||||
SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40;
|
||||
|
||||
-- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb.
|
||||
-- Small additional reads still possible, so we compare with about 1.5Gb.
|
||||
SYSTEM FLUSH LOGS;
|
||||
|
||||
SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40;' and current_database = currentDatabase() and type = 'QueryFinish';
|
Loading…
Reference in New Issue
Block a user