Merge remote-tracking branch 'upstream/master' into async-insert

This commit is contained in:
Ivan Lezhankin 2021-06-28 22:55:56 +03:00
commit 1bb41593d1
486 changed files with 9456 additions and 3077 deletions

2
.gitmodules vendored
View File

@ -103,7 +103,7 @@
url = https://github.com/ClickHouse-Extras/fastops
[submodule "contrib/orc"]
path = contrib/orc
url = https://github.com/apache/orc
url = https://github.com/ClickHouse-Extras/orc
[submodule "contrib/sparsehash-c11"]
path = contrib/sparsehash-c11
url = https://github.com/sparsehash/sparsehash-c11.git

View File

@ -2,8 +2,6 @@
#### Upgrade Notes
* One bug has been found after release: [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187).
* Do not upgrade if you have partition key with `UUID`.
* `zstd` compression library is updated to v1.5.0. You may get messages about "checksum does not match" in replication. These messages are expected due to update of compression algorithm and you can ignore them. These messages are informational and do not indicate any kinds of undesired behaviour.
* The setting `compile_expressions` is enabled by default. Although it has been heavily tested on variety of scenarios, if you find some undesired behaviour on your servers, you can try turning this setting off.
* Values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`.

View File

@ -184,10 +184,27 @@ endif ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (NOT OBJCOPY_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX)
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
if (NOT OBJCOPY_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX)
find_program (OBJCOPY_PATH NAMES "objcopy" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
endif ()
endif ()
endif ()
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
message (STATUS "Using objcopy: ${OBJCOPY_PATH}")
else ()
message(FATAL_ERROR "Cannot find objcopy.")
message (FATAL_ERROR "Cannot find objcopy.")
endif ()
if (OS_DARWIN)

View File

@ -15,4 +15,4 @@ ClickHouse® is an open-source column-oriented database management system that a
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [SF Bay Area ClickHouse Community Meetup (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/278144089/) on 16 June 2021.
* [China ClickHouse Community Meetup (online)](http://hdxu.cn/rhbfZ) on 26 June 2021.

View File

@ -17,7 +17,7 @@ class DateLUT : private boost::noncopyable
{
public:
/// Return singleton DateLUTImpl instance for the default time zone.
static ALWAYS_INLINE const DateLUTImpl & instance()
static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071
{
const auto & date_lut = getInstance();
return *date_lut.default_impl.load(std::memory_order_acquire);

View File

@ -8,13 +8,6 @@
extern "C" {
#endif
#include <pthread.h>
size_t __pthread_get_minstack(const pthread_attr_t * attr)
{
return 1048576; /// This is a guess. Don't sure it is correct.
}
#include <signal.h>
#include <unistd.h>
#include <string.h>
@ -141,6 +134,8 @@ int __open_2(const char *path, int oflag)
}
#include <pthread.h>
/// No-ops.
int pthread_setname_np(pthread_t thread, const char *name) { return 0; }
int pthread_getname_np(pthread_t thread, char *name, size_t len) { name[0] = '\0'; return 0; };

View File

@ -2,7 +2,7 @@
#include <errmsg.h>
#include <mysql.h>
#else
#include <mysql/errmsg.h>
#include <mysql/errmsg.h> //Y_IGNORE
#include <mysql/mysql.h>
#endif

39
base/mysqlxx/ya.make Normal file
View File

@ -0,0 +1,39 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
LIBRARY()
OWNER(g:clickhouse)
CFLAGS(-g0)
PEERDIR(
contrib/restricted/boost/libs
contrib/libs/libmysql_r
contrib/libs/poco/Foundation
contrib/libs/poco/Util
)
ADDINCL(
GLOBAL clickhouse/base
clickhouse/base
contrib/libs/libmysql_r
)
NO_COMPILER_WARNINGS()
NO_UTIL()
SRCS(
Connection.cpp
Exception.cpp
Pool.cpp
PoolFactory.cpp
PoolWithFailover.cpp
Query.cpp
ResultBase.cpp
Row.cpp
UseQueryResult.cpp
Value.cpp
)
END()

28
base/mysqlxx/ya.make.in Normal file
View File

@ -0,0 +1,28 @@
LIBRARY()
OWNER(g:clickhouse)
CFLAGS(-g0)
PEERDIR(
contrib/restricted/boost/libs
contrib/libs/libmysql_r
contrib/libs/poco/Foundation
contrib/libs/poco/Util
)
ADDINCL(
GLOBAL clickhouse/base
clickhouse/base
contrib/libs/libmysql_r
)
NO_COMPILER_WARNINGS()
NO_UTIL()
SRCS(
<? find . -name '*.cpp' | grep -v -F tests/ | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -4,6 +4,7 @@ RECURSE(
common
daemon
loggers
mysqlxx
pcg-random
widechar_width
readpassphrase

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54452)
SET(VERSION_REVISION 54453)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 976ccc2e908ac3bc28f763bfea8134ea0a121b40)
SET(VERSION_DESCRIBE v21.7.1.1-prestable)
SET(VERSION_STRING 21.7.1.1)
SET(VERSION_GITHASH fb895056568e26200629c7d19626e92d2dedc70d)
SET(VERSION_DESCRIBE v21.8.1.1-prestable)
SET(VERSION_STRING 21.8.1.1)
# end of autochange

View File

@ -33,44 +33,25 @@ macro(clickhouse_embed_binaries)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
# If cross-compiling, ensure we use the toolchain file and target the
# actual target architecture
if (CMAKE_CROSSCOMPILING)
set(CROSS_COMPILE_FLAGS "--target=${CMAKE_C_COMPILER_TARGET} --gcc-toolchain=${TOOLCHAIN_FILE}")
else()
set(CROSS_COMPILE_FLAGS "")
endif()
add_library("${EMBED_TARGET}" STATIC)
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
set(RESOURCE_OBJS)
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(RESOURCE_OBJ "${RESOURCE_FILE}.o")
list(APPEND RESOURCE_OBJS "${RESOURCE_OBJ}")
# Normalize the name of the resource
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
# Normalize the name of the resource.
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
# Put the configured assembly file in the output directory.
# This is so we can clean it up as usual, and we CD to the
# source directory before compiling, so that the assembly
# `.incbin` directive can find the file.
# Generate the configured assembly file in the output directory.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Generate the output object file by compiling the assembly, in the directory of
# the sources so that the resource file may also be found
add_custom_command(
OUTPUT ${RESOURCE_OBJ}
COMMAND cd "${EMBED_RESOURCE_DIR}" &&
${CMAKE_C_COMPILER} "${CROSS_COMPILE_FLAGS}" -c -o
"${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}"
"${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}"
)
set_source_files_properties("${RESOURCE_OBJ}" PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach()
# Set the include directory for relative paths specified for `.incbin` directive.
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
add_library("${EMBED_TARGET}" STATIC ${RESOURCE_OBJS})
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
endforeach()
endmacro()

View File

@ -4,7 +4,6 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
get_filename_component (TOOLCHAIN_FILE "${CMAKE_TOOLCHAIN_FILE}" REALPATH)
# We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 2a1bf7d87b4a03561fc66fbb49cee8a288983c5d
Subproject commit 976874b7aa7f422bf4ea595bb7d1166c617b1c26

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit 616b3dc76a0c8450b4027ded8a78e9619d7c845f
Subproject commit debf751a129bdda9ff4d1e895e08957ff77000a1

View File

@ -188,6 +188,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/array/util.cc"
"${LIBRARY_DIR}/array/validate.cc"
"${LIBRARY_DIR}/compute/api_aggregate.cc"
"${LIBRARY_DIR}/compute/api_scalar.cc"
"${LIBRARY_DIR}/compute/api_vector.cc"
"${LIBRARY_DIR}/compute/cast.cc"
@ -198,8 +199,11 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_tdigest.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc"
"${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
"${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
@ -243,6 +247,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
"${LIBRARY_DIR}/tensor/csf_converter.cc"
@ -256,11 +261,8 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"
"${LIBRARY_DIR}/util/compression_lz4.cc"
"${LIBRARY_DIR}/util/compression_snappy.cc"
"${LIBRARY_DIR}/util/compression_zlib.cc"
"${LIBRARY_DIR}/util/compression_zstd.cc"
"${LIBRARY_DIR}/util/cpu_info.cc"
"${LIBRARY_DIR}/util/decimal.cc"
"${LIBRARY_DIR}/util/delimiting.cc"
@ -268,13 +270,14 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/future.cc"
"${LIBRARY_DIR}/util/int_util.cc"
"${LIBRARY_DIR}/util/io_util.cc"
"${LIBRARY_DIR}/util/iterator.cc"
"${LIBRARY_DIR}/util/key_value_metadata.cc"
"${LIBRARY_DIR}/util/logging.cc"
"${LIBRARY_DIR}/util/memory.cc"
"${LIBRARY_DIR}/util/mutex.cc"
"${LIBRARY_DIR}/util/string_builder.cc"
"${LIBRARY_DIR}/util/string.cc"
"${LIBRARY_DIR}/util/task_group.cc"
"${LIBRARY_DIR}/util/tdigest.cc"
"${LIBRARY_DIR}/util/thread_pool.cc"
"${LIBRARY_DIR}/util/time.cc"
"${LIBRARY_DIR}/util/trie.cc"
@ -368,14 +371,14 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/column_reader.cc"
"${LIBRARY_DIR}/column_scanner.cc"
"${LIBRARY_DIR}/column_writer.cc"
"${LIBRARY_DIR}/deprecated_io.cc"
"${LIBRARY_DIR}/encoding.cc"
"${LIBRARY_DIR}/encryption.cc"
"${LIBRARY_DIR}/encryption_internal.cc"
"${LIBRARY_DIR}/encryption/encryption.cc"
"${LIBRARY_DIR}/encryption/encryption_internal.cc"
"${LIBRARY_DIR}/encryption/internal_file_decryptor.cc"
"${LIBRARY_DIR}/encryption/internal_file_encryptor.cc"
"${LIBRARY_DIR}/exception.cc"
"${LIBRARY_DIR}/file_reader.cc"
"${LIBRARY_DIR}/file_writer.cc"
"${LIBRARY_DIR}/internal_file_decryptor.cc"
"${LIBRARY_DIR}/internal_file_encryptor.cc"
"${LIBRARY_DIR}/level_conversion.cc"
"${LIBRARY_DIR}/level_comparison.cc"
"${LIBRARY_DIR}/metadata.cc"
@ -385,6 +388,8 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/properties.cc"
"${LIBRARY_DIR}/schema.cc"
"${LIBRARY_DIR}/statistics.cc"
"${LIBRARY_DIR}/stream_reader.cc"
"${LIBRARY_DIR}/stream_writer.cc"
"${LIBRARY_DIR}/types.cc"
"${GEN_LIBRARY_DIR}/parquet_constants.cpp"

View File

@ -26,7 +26,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
set_property (TARGET cctz PROPERTY IMPORTED_LOCATION ${LIBRARY_CCTZ})
set_property (TARGET cctz PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_CCTZ})
endif()
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")

2
contrib/flatbuffers vendored

@ -1 +1 @@
Subproject commit 22e3ffc66d2d7d72d1414390aa0f04ffd114a5a1
Subproject commit eb3f827948241ce0e701516f16cd67324802bce9

View File

@ -1,7 +1,7 @@
add_library(murmurhash
src/murmurhash2.cpp
src/murmurhash3.cpp
include/murmurhash2.h
include/murmurhash3.h)
src/MurmurHash2.cpp
src/MurmurHash3.cpp
include/MurmurHash2.h
include/MurmurHash3.h)
target_include_directories (murmurhash PUBLIC include)

View File

@ -0,0 +1,49 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#ifndef MURMURHASH2_H
#define MURMURHASH2_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed );
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed );
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed );
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH2_H_

View File

@ -2,7 +2,10 @@
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
#ifndef MURMURHASH3_H
#define MURMURHASH3_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -23,20 +26,22 @@ typedef unsigned __int64 uint64_t;
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH3_H_

View File

@ -1,31 +0,0 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2 (const void * key, int len, uint32_t seed);
uint64_t MurmurHash64A (const void * key, int len, uint64_t seed);
uint64_t MurmurHash64B (const void * key, int len, uint64_t seed);
uint32_t MurmurHash2A (const void * key, int len, uint32_t seed);
uint32_t MurmurHashNeutral2 (const void * key, int len, uint32_t seed);
uint32_t MurmurHashAligned2 (const void * key, int len, uint32_t seed);

View File

@ -0,0 +1,523 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "MurmurHash2.h"
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = (const uint64_t *)key;
const uint64_t * end = data + (len/8);
while(data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = (const unsigned char*)data;
switch(len & 7)
{
case 7: h ^= uint64_t(data2[6]) << 48;
case 6: h ^= uint64_t(data2[5]) << 40;
case 5: h ^= uint64_t(data2[4]) << 32;
case 4: h ^= uint64_t(data2[3]) << 24;
case 3: h ^= uint64_t(data2[2]) << 16;
case 2: h ^= uint64_t(data2[1]) << 8;
case 1: h ^= uint64_t(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = uint32_t(seed) ^ len;
uint32_t h2 = uint32_t(seed >> 32);
const uint32_t * data = (const uint32_t *)key;
while(len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if(len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch(len)
{
case 3: h2 ^= ((unsigned char*)data)[2] << 16;
case 2: h2 ^= ((unsigned char*)data)[1] << 8;
case 1: h2 ^= ((unsigned char*)data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch(len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// CMurmurHash2A, by Austin Appleby
// This is a sample implementation of MurmurHash2A designed to work
// incrementally.
// Usage -
// CMurmurHash2A hasher
// hasher.Begin(seed);
// hasher.Add(data1,size1);
// hasher.Add(data2,size2);
// ...
// hasher.Add(dataN,sizeN);
// uint32_t hash = hasher.End()
class CMurmurHash2A
{
public:
void Begin ( uint32_t seed = 0 )
{
m_hash = seed;
m_tail = 0;
m_count = 0;
m_size = 0;
}
void Add ( const unsigned char * data, size_t len )
{
m_size += len;
MixTail(data,len);
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(m_hash,k);
data += 4;
len -= 4;
}
MixTail(data,len);
}
uint32_t End ( void )
{
mmix(m_hash,m_tail);
mmix(m_hash,m_size);
m_hash ^= m_hash >> 13;
m_hash *= m;
m_hash ^= m_hash >> 15;
return m_hash;
}
private:
static const uint32_t m = 0x5bd1e995;
static const int r = 24;
void MixTail ( const unsigned char * & data, size_t & len )
{
while( len && ((len<4) || m_count) )
{
m_tail |= (*data++) << (m_count * 8);
m_count++;
len--;
if(m_count == 4)
{
mmix(m_hash,m_tail);
m_tail = 0;
m_count = 0;
}
}
}
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
};
//-----------------------------------------------------------------------------
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed ^ len;
size_t align = (uint64_t)data & 3;
if(align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch(align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while(len >= 4)
{
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if(len >= align)
{
switch(align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch(len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while(len >= 4)
{
uint32_t k = *(uint32_t *)data;
MIX(h,k,m);
data += 4;
len -= 4;
}
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}
//-----------------------------------------------------------------------------

View File

@ -1,3 +1,4 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
@ -6,8 +7,8 @@
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "murmurhash3.h"
#include <cstring>
#include "MurmurHash3.h"
#include <string.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -93,7 +94,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len,
void MurmurHash3_x86_32 ( const void * key, size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -149,7 +150,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x86_128 ( const void * key, const int len,
void MurmurHash3_x86_128 ( const void * key, const size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -254,7 +255,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
void MurmurHash3_x64_128 ( const void * key, const size_t len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -332,3 +333,6 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
//-----------------------------------------------------------------------------

View File

@ -1,423 +0,0 @@
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "murmurhash2.h"
#include <cstring>
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2(const void * key, int len, uint32_t seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = reinterpret_cast<const uint64_t *>(key);
const uint64_t * end = data + (len/8);
while (data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = reinterpret_cast<const unsigned char *>(data);
switch (len & 7)
{
case 7: h ^= static_cast<uint64_t>(data2[6]) << 48;
case 6: h ^= static_cast<uint64_t>(data2[5]) << 40;
case 5: h ^= static_cast<uint64_t>(data2[4]) << 32;
case 4: h ^= static_cast<uint64_t>(data2[3]) << 24;
case 3: h ^= static_cast<uint64_t>(data2[2]) << 16;
case 2: h ^= static_cast<uint64_t>(data2[1]) << 8;
case 1: h ^= static_cast<uint64_t>(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B(const void * key, int len, uint64_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = static_cast<uint32_t>(seed) ^ len;
uint32_t h2 = static_cast<uint32_t>(seed >> 32);
const uint32_t * data = reinterpret_cast<const uint32_t *>(key);
while (len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if (len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch (len)
{
case 3: h2 ^= reinterpret_cast<const unsigned char *>(data)[2] << 16;
case 2: h2 ^= reinterpret_cast<const unsigned char *>(data)[1] << 8;
case 1: h2 ^= reinterpret_cast<const unsigned char *>(data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed;
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch (len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed ^ len;
int align = reinterpret_cast<uint64_t>(data) & 3;
if (align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch (align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while (len >= 4)
{
d = *(reinterpret_cast<const uint32_t *>(data));
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if (len >= align)
{
switch (align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch (len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
MIX(h,k,m);
data += 4;
len -= 4;
}
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit 5981208e39447df84827f6a961d1da76bacb6078
Subproject commit 0a936f6bbdb9303308973073f8623b5a8d82eae1

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 2b24f14594d7606792b92544bb112a6322ba34d7
Subproject commit c81be6c68b146f15f2096b7ef80e3f21fe27004c

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.7.1.1) unstable; urgency=low
clickhouse (21.8.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 20 May 2021 22:23:29 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 28 Jun 2021 00:50:15 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -72,7 +72,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin
# Build and install tools for cross-linking to Darwin (x86-64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
@ -81,8 +81,17 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
&& make install \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain and SDK for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
# Download toolchain for ARM
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.

View File

@ -3,7 +3,9 @@
set -x -e
mkdir -p build/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
mkdir -p build/cmake/toolchain/linux-aarch64
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1

View File

@ -58,6 +58,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
CLANG_PREFIX = "clang"
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
FREEBSD_SUFFIX = "-freebsd"
@ -66,9 +67,10 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
is_clang = compiler.startswith(CLANG_PREFIX)
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
is_cross_arm = compiler.endswith(ARM_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_cross_compile = is_cross_darwin or is_cross_arm or is_cross_freebsd
is_cross_compile = is_cross_darwin or is_cross_darwin_arm or is_cross_arm or is_cross_freebsd
# Explicitly use LLD with Clang by default.
# Don't force linker for cross-compilation.
@ -82,6 +84,13 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
elif is_cross_darwin_arm:
cc = compiler[:-len(DARWIN_ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
elif is_cross_arm:
cc = compiler[:-len(ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
@ -185,8 +194,8 @@ if __name__ == "__main__":
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
parser.add_argument("--output-dir", required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd",
"gcc-10"), default="clang-11")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
"clang-11-freebsd", "gcc-10"), default="clang-11")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument("--unbundled", action="store_true")
parser.add_argument("--split-binary", action="store_true")

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -113,6 +113,7 @@ function start_server
echo "ClickHouse server pid '$server_pid' started and responded"
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -103,6 +103,7 @@ function fuzz
kill -0 $server_pid
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -561,7 +561,7 @@ if args.report == 'main':
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
if very_unstable_queries > 3:
if very_unstable_queries > 5:
error_tests += very_unstable_queries
status = 'failure'
message_array.append(str(very_unstable_queries) + ' unstable')

View File

@ -55,6 +55,7 @@ function start()
done
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -2,18 +2,16 @@
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
Minimal ClickHouse build example:
```cmake
```bash
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_C_COMPILER=$(which clang-11) \
-DCMAKE_CXX_COMPILER=$(which clang++-11) \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \

View File

@ -33,7 +33,7 @@ Reboot.
``` bash
brew update
brew install cmake ninja libtool gettext llvm gcc
brew install cmake ninja libtool gettext llvm gcc binutils
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -126,7 +126,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t
- **Compiler**: `gcc-9` or `clang-10` (or `clang-10-xx` for other architectures e.g. `clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Bundled**: `bundled` build uses system libraries, and `unbundled` build uses libraries from `contrib` folder.
- **Bundled**: `bundled` build uses libraries from `contrib` folder, and `unbundled` build uses system libraries.
- **Splitted** `splitted` is a [split build](build.md#split-build)
- **Status**: `success` or `fail`
- **Build log**: link to the building and files copying log, useful when build failed.

View File

@ -49,6 +49,7 @@ When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](.
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |

View File

@ -0,0 +1,53 @@
---
toc_priority: 12
toc_title: ExternalDistributed
---
# ExternalDistributed {#externaldistributed}
The `ExternalDistributed` engine allows to perform `SELECT` queries on data that is stored on a remote servers MySQL or PostgreSQL. Accepts [MySQL](../../../engines/table-engines/integrations/mysql.md) or [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) engines as an argument so sharding is possible.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password');
```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
The table structure can differ from the original table structure:
- Column names should be the same as in the original table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
**Engine Parameters**
- `engine` — The table engine `MySQL` or `PostgreSQL`.
- `host:port` — MySQL or PostgreSQL server address.
- `database` — Remote database name.
- `table` — Remote table name.
- `user` — User name.
- `password` — User password.
## Implementation Details {#implementation-details}
Supports multiple replicas that must be listed by `|` and shards must be listed by `,`. For example:
```sql
CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) ENGINE = ExternalDistributed('MySQL', `mysql{1|2}:3306,mysql{3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
When specifying replicas, one of the available replicas is selected for each of the shards when reading. If the connection fails, the next replica is selected, and so on for all the replicas. If the connection attempt fails for all the replicas, the attempt is repeated the same way several times.
You can specify any number of shards and any number of replicas for each shard.
**See Also**
- [MySQL table engine](../../../engines/table-engines/integrations/mysql.md)
- [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md)
- [Distributed table engine](../../../engines/table-engines/special/distributed.md)

View File

@ -28,8 +28,8 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen
The table structure can differ from the original MySQL table structure:
- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is true, if false - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types.
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types.
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
**Engine Parameters**
@ -55,6 +55,12 @@ Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are executed on the MySQL s
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
Supports multiple replicas that must be listed by `|`. For example:
```sql
CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL(`mysql{2|3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
## Usage Example {#usage-example}
Table in MySQL:

View File

@ -29,7 +29,7 @@ The table structure can differ from the source table structure:
- Column names should be the same as in the source table, but you can use just some of these columns and in any order.
- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is true, if false - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types.
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
**Engine Parameters**

View File

@ -23,8 +23,8 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen
The table structure can differ from the original PostgreSQL table structure:
- Column names should be the same as in the original PostgreSQL table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is 1, if 0 - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types.
- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../engines/database-engines/postgresql.md#data_types-support) values to the ClickHouse data types.
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
**Engine Parameters**
@ -49,6 +49,12 @@ PostgreSQL `Array` types are converted into ClickHouse arrays.
!!! info "Note"
Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column.
Supports multiple replicas that must be listed by `|`. For example:
```sql
CREATE TABLE test_replicas (id UInt32, name String) ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword');
```
Replicas priority for PostgreSQL dictionary source is supported. The bigger the number in map, the less the priority. The highest priority is `0`.

View File

@ -695,7 +695,8 @@ PARTITION BY toYYYYMM(EventDate)
SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
```
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`.
You could change storage policy after table creation with [ALTER TABLE ... MODIFY SETTING] query, new policy should include all old disks and volumes with same names.
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.

View File

@ -65,7 +65,7 @@ By checking the row count:
Query:
``` sq;
``` sql
SELECT count() FROM recipes;
```

View File

@ -1302,6 +1302,7 @@ The table below shows supported data types and how they match ClickHouse [data t
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.

View File

@ -148,5 +148,11 @@ toc_title: Adopters
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://www.tesla.com/" class="favicon">Tesla</a> | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) |
| <a href="https://www.kgk-global.com/en/" class="favicon">KGK Global</a> | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) |
| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
| <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
| <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
| <a href="https://www.argedor.com/en/clickhouse/" class="favicon">Argedor</a> | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) |
| <a href="https://signoz.io/" class="favicon">SigNoz</a> | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -379,7 +379,7 @@ Default value: `1`.
## insert_null_as_default {#insert_null_as_default}
Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type.
Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type.
If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
@ -1182,7 +1182,7 @@ Possible values:
Default value: `1`.
**Additional Info**
**Additional Info**
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
@ -1194,21 +1194,22 @@ This setting is useful for replicated tables with a sampling key. A query may be
!!! warning "Warning"
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
## compile {#compile}
## compile_expressions {#compile-expressions}
Enable compilation of queries. By default, 0 (disabled).
Enables or disables compilation of frequently used simple functions and operators to native code with LLVM at runtime.
The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY).
If this portion of the pipeline was compiled, the query may run faster due to the deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution.
Possible values:
## min_count_to_compile {#min-count-to-compile}
- 0 — Disabled.
- 1 — Enabled.
How many times to potentially use a compiled chunk of code before running compilation. By default, 3.
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including queries that are currently running.
Default value: `1`.
Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause.
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they do not use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## min_count_to_compile_expression {#min-count-to-compile-expression}
Minimum count of executing same expression before it is get compiled.
Default value: `3`.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
@ -1558,7 +1559,7 @@ Possible values:
- 0 — Disabled (final query processing is done on the initiator node).
- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possilbe when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
**Example**
@ -1591,6 +1592,18 @@ FORMAT PrettyCompactMonoBlock
Default value: 0
## distributed_push_down_limit (#distributed-push-down-limit}
LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.
Possible values:
- 0 - Disabled
- 1 - Enabled
!!! note "Note"
That with this setting the result of the query may be inaccurate.
## optimize_skip_unused_shards_limit {#optimize-skip-unused-shards-limit}
Limit for number of sharding key values, turns off `optimize_skip_unused_shards` if the limit is reached.
@ -1610,7 +1623,7 @@ Possible values:
Default value: 0
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shardslrewrite-in}
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shards-rewrite-in}
Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards).
@ -1790,6 +1803,27 @@ Possible values:
Default value: 0.
## distributed_directory_monitor_split_batch_on_failure {#distributed_directory_monitor_split_batch_on_failure}
Enables/disables splitting batches on failures.
Sometimes sending particular batch to the remote shard may fail, because of some complex pipeline after (i.e. `MATERIALIZED VIEW` with `GROUP BY`) due to `Memory limit exceeded` or similar errors. In this case, retrying will not help (and this will stuck distributed sends for the table) but sending files from that batch one by one may succeed INSERT.
So installing this setting to `1` will disable batching for such batches (i.e. temporary disables `distributed_directory_monitor_batch_inserts` for failed batches).
Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: 0.
!!! note "Note"
This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine).
!!! warning "Warning"
You should not rely on automatic batch splitting, since this may hurt performance.
## os_thread_priority {#setting-os-thread-priority}
Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core.
@ -2073,7 +2107,7 @@ Default value: 128.
## background_fetches_pool_size {#background_fetches_pool_size}
Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and cant be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recomended to use default value.
Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and cant be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recommended to use default value.
Possible values:
@ -2660,7 +2694,7 @@ Default value: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
Possible values:
@ -2844,7 +2878,7 @@ Default value: `0`.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
Possible values:
@ -2950,7 +2984,7 @@ Enables or disables using the original column names instead of aliases in query
Possible values:
- 0 — The column name is substituted with the alias.
- 1 — The column name is not substituted with the alias.
- 1 — The column name is not substituted with the alias.
Default value: `0`.
@ -3063,8 +3097,86 @@ SELECT
sum(a),
sumCount(b).1,
sumCount(b).2,
(sumCount(b).1) / (sumCount(b).2)
(sumCount(b).1) / (sumCount(b).2)
FROM fuse_tbl
```
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
## flatten_nested {#flatten-nested}
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns.
Possible values:
- 1 — Nested column is flattened to separate arrays.
- 0 — Nested column stays a single array of tuples.
Default value: `1`.
**Usage**
If the setting is set to `0`, it is possible to use an arbitrary level of nesting.
**Examples**
Query:
``` sql
SET flatten_nested = 1;
CREATE TABLE t_nest (`n` Nested(a UInt32, b UInt32)) ENGINE = MergeTree ORDER BY tuple();
SHOW CREATE TABLE t_nest;
```
Result:
``` text
┌─statement───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_nest
(
`n.a` Array(UInt32),
`n.b` Array(UInt32)
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 8192 │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Query:
``` sql
SET flatten_nested = 0;
CREATE TABLE t_nest (`n` Nested(a UInt32, b UInt32)) ENGINE = MergeTree ORDER BY tuple();
SHOW CREATE TABLE t_nest;
```
Result:
``` text
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_nest
(
`n` Nested(a UInt32, b UInt32)
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## external_table_functions_use_nulls {#external-table-functions-use-nulls}
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md)] table functions use Nullable columns.
Possible values:
- 0 — The table function explicitly uses Nullable columns.
- 1 — The table function implicitly uses Nullable columns.
Default value: `1`.
**Usage**
If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.

View File

@ -0,0 +1,39 @@
# system.data_skipping_indices {#system-data-skipping-indices}
Contains information about existing data skipping indices in all the tables.
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `name` ([String](../../sql-reference/data-types/string.md)) — Index name.
- `type` ([String](../../sql-reference/data-types/string.md)) — Index type.
- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression used to calculate the index.
- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of granules in the block.
**Example**
```sql
SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
database: default
table: user_actions
name: clicks_idx
type: minmax
expr: clicks
granularity: 1
Row 2:
──────
database: default
table: users
name: contacts_null_idx
type: minmax
expr: assumeNotNull(contacts_null)
granularity: 1
```

View File

@ -30,14 +30,6 @@ Do not disable overcommit. The value `cat /proc/sys/vm/overcommit_memory` should
$ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
```
## Huge Pages {#huge-pages}
Always disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation.
``` bash
$ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
```
Use `perf top` to watch the time spent in the kernel for memory management.
Permanent huge pages also do not need to be allocated.
@ -91,6 +83,15 @@ The Linux kernel prior to 3.2 had a multitude of problems with IPv6 implementati
Use at least a 10 GB network, if possible. 1 Gb will also work, but it will be much worse for patching replicas with tens of terabytes of data, or for processing distributed queries with a large amount of intermediate data.
## Huge Pages {#huge-pages}
If you are using old Linux kernel, disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation.
On newer Linux kernels transparent huge pages are alright.
``` bash
$ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
```
## Hypervisor configuration
If you are using OpenStack, set

View File

@ -1,37 +0,0 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
**Syntax**
``` sql
initializeAggregation (aggregate_function, column_1, column_2)
```
**Arguments**
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
**Returned value(s)**
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
**Example**
Query:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Result:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -74,4 +74,26 @@ Received exception from server (version 1.1.54388):
Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
```
[Original article](https://clickhouse.tech/docs/en/data_types/array/) <!--hide-->
## Array Size {#array-size}
It is possible to find the size of an array by using the `size0` subcolumn without reading the whole column. For multi-dimensional arrays you can use `sizeN-1`, where `N` is the wanted dimension.
**Example**
Query:
```sql
CREATE TABLE t_arr (`arr` Array(Array(Array(UInt32)))) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_arr VALUES ([[[12, 13, 0, 1],[12]]]);
SELECT arr.size0, arr.size1, arr.size2 FROM t_arr;
```
Result:
``` text
┌─arr.size0─┬─arr.size1─┬─arr.size2─┐
│ 1 │ [2] │ [[4,1]] │
└───────────┴───────────┴───────────┘
```

View File

@ -34,7 +34,7 @@ CREATE TABLE test.visits
This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the visits table can correspond to zero or any number of conversions.
Only a single nesting level is supported. Columns of nested structures containing arrays are equivalent to multidimensional arrays, so they have limited support (there is no support for storing these columns in tables with the MergeTree engine).
When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported.
In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length.

View File

@ -20,6 +20,33 @@ To store `Nullable` type values in a table column, ClickHouse uses a separate fi
!!! info "Note"
Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases.
## Finding NULL {#finding-null}
It is possible to find `NULL` values in a column by using `null` subcolumn without reading the whole column. It returns `1` if the corresponding value is `NULL` and `0` otherwise.
**Example**
Query:
``` sql
CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO nullable VALUES (1) (NULL) (2) (NULL);
SELECT n.null FROM nullable;
```
Result:
``` text
┌─n.null─┐
│ 0 │
│ 1 │
│ 0 │
│ 1 │
└────────┘
```
## Usage Example {#usage-example}
``` sql

View File

@ -47,4 +47,32 @@ SELECT tuple(1, NULL) AS x, toTypeName(x)
└──────────┴─────────────────────────────────┘
```
## Addressing Tuple Elements {#addressing-tuple-elements}
It is possible to read elements of named tuples using indexes and names:
``` sql
CREATE TABLE named_tuples (`a` Tuple(s String, i Int64)) ENGINE = Memory;
INSERT INTO named_tuples VALUES (('y', 10)), (('x',-10));
SELECT a.s FROM named_tuples;
SELECT a.2 FROM named_tuples;
```
Result:
``` text
┌─a.s─┐
│ y │
│ x │
└─────┘
┌─tupleElement(a, 2)─┐
│ 10 │
│ -10 │
└────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/data_types/tuple/) <!--hide-->

View File

@ -60,7 +60,8 @@ SETTINGS(format_csv_allow_single_quotes = 0)
Types of sources (`source_type`):
- [Local file](#dicts-external_dicts_dict_sources-local_file)
- [Executable file](#dicts-external_dicts_dict_sources-executable)
- [Executable File](#dicts-external_dicts_dict_sources-executable)
- [Executable Pool](#dicts-external_dicts_dict_sources-executable_pool)
- [HTTP(s)](#dicts-external_dicts_dict_sources-http)
- DBMS
- [ODBC](#dicts-external_dicts_dict_sources-odbc)
@ -94,7 +95,7 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
Setting fields:
- `path` The absolute path to the file.
- `format` The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `format` The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in `user_files` directory, to prevent DB users accessing arbitrary file on ClickHouse node.
@ -113,21 +114,24 @@ Example of settings:
<executable>
<command>cat /opt/dictionaries/os.tsv</command>
<format>TabSeparated</format>
<implicit_key>false</implicit_key>
</executable>
</source>
```
Setting fields:
- `command` The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `implicit_key` - The executable source file can return only values, and the correspondence to the requested keys is determined implicitly - by the order of rows in the result. Default value is false.
- `command` The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `implicit_key` The executable source file can return only values, and the correspondence to the requested keys is determined implicitly by the order of rows in the result. Default value is false.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
## Executable Pool {#dicts-external_dicts_dict_sources-executable_pool}
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary is stored using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct` layouts. Executable pool will spawn pool of processes with specified command and keep them running until they exit. The program should read data from STDIN while it is available and output result to STDOUT, and it can wait for next block of data on stdin. ClickHouse will not close STDIN after processing a block of data but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing - it should poll STDIN and flush data to STDOUT early.
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct` layouts.
Executable pool will spawn pool of processes with specified command and keep them running until they exit. The program should read data from STDIN while it is available and output result to STDOUT, and it can wait for next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early.
Example of settings:
@ -145,12 +149,12 @@ Example of settings:
Setting fields:
- `command` The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `pool_size` - Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions.
- `command_termination_timeout` - Executable pool script, should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have command_termination_timeout seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `max_command_execution_time` - Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
- `implicit_key` - The executable source file can return only values, and the correspondence to the requested keys is determined implicitly - by the order of rows in the result. Default value is false. Optional parameter.
- `command` The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `pool_size` Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions.
- `command_termination_timeout` — Executable pool script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `max_command_execution_time` Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
- `implicit_key` The executable source file can return only values, and the correspondence to the requested keys is determined implicitly by the order of rows in the result. Default value is false. Optional parameter.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
@ -482,6 +486,7 @@ Example of settings:
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -499,6 +504,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```
@ -523,6 +529,8 @@ Setting fields:
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
- `fail_on_connection_loss` The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`.
MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`.
Example of settings:
@ -538,6 +546,7 @@ Example of settings:
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -554,6 +563,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```

View File

@ -39,13 +39,44 @@ Accepts zero arguments and returns an empty array of the appropriate type.
Accepts an empty array and returns a one-element array that is equal to the default value.
## range(end), range(start, end \[, step\]) {#rangeend-rangestart-end-step}
Returns an array of numbers from start to end-1 by step.
If the argument `start` is not specified, defaults to 0.
If the argument `step` is not specified, defaults to 1.
It behaviors almost like pythonic `range`. But the difference is that all the arguments type must be `UInt` numbers.
Just in case, an exception is thrown if arrays with a total length of more than 100,000,000 elements are created in a data block.
## range(end), range(\[start, \] end \[, step\]) {#range}
Returns an array of `UInt` numbers from `start` to `end - 1` by `step`.
**Syntax**
``` sql
range([start, ] end [, step])
```
**Arguments**
- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. [UInt](../data-types/int-uint.md)
- `end` — The number before which the array is constructed. Required. [UInt](../data-types/int-uint.md)
- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. [UInt](../data-types/int-uint.md)
**Returned value**
- Array of `UInt` numbers from `start` to `end - 1` by `step`.
**Implementation details**
- All arguments must be positive values: `start`, `end`, `step` are `UInt` data types, as well as elements of the returned array.
- An exception is thrown if query results in arrays with a total length of more than 100,000,000 elements.
**Examples**
Query:
``` sql
SELECT range(5), range(1, 5), range(1, 5, 2);
```
Result:
```txt
┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │
└─────────────┴─────────────┴────────────────┘
```
## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1}

View File

@ -6,7 +6,7 @@ toc_title: Encoding
# Encoding Functions {#encoding-functions}
## char {#char}
Returns the string with the length as the number of passed arguments and each byte has the value of corresponding argument. Accepts multiple arguments of numeric types. If the value of argument is out of range of UInt8 data type, it is converted to UInt8 with possible rounding and overflow.
**Syntax**
@ -156,7 +156,7 @@ Performs the opposite operation of [hex](#hex). It interprets each pair of hexad
If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions.
!!! note "Note"
If `unhex` is invoked from within the `clickhouse-client`, binary strings display using UTF-8.
If `unhex` is invoked from within the `clickhouse-client`, binary strings display using UTF-8.
Alias: `UNHEX`.
@ -221,3 +221,51 @@ Accepts an integer. Returns a string containing the list of powers of two that t
## bitmaskToArray(num) {#bitmasktoarraynum}
Accepts an integer. Returns an array of UInt64 numbers containing the list of powers of two that total the source number when summed. Numbers in the array are in ascending order.
## bitPositionsToArray(num) {#bitpositionstoarraynum}
Accepts an integer, argument will be converted to unsigned integer type. Returns an array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
**Syntax**
```sql
bitPositionsToArray(arg)
```
**Arguments**
- `arg` — Integer value.Types: [Int/UInt](../../sql-reference/data-types/int-uint.md)
**Returned value**
An array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
**Example**
Query:
``` sql
SELECT bitPositionsToArray(toInt8(1)) AS bit_positions;
```
Result:
``` text
┌─bit_positions─┐
│ [0] │
└───────────────┘
```
Query:
``` sql
select bitPositionsToArray(toInt8(-1)) as bit_positions;
```
Result:
``` text
┌─bit_positions─────┐
│ [0,1,2,3,4,5,6,7] │
└───────────────────┘
```

View File

@ -831,7 +831,7 @@ Returns 0 for the first row and the difference from the previous row for each su
!!! warning "Warning"
It can reach the previous row only inside the currently processed data block.
The result of the function depends on the affected data blocks and the order of data in the block.
The rows order used during the calculation of `runningDifference` can differ from the order of rows returned to the user.
@ -908,7 +908,7 @@ Same as for [runningDifference](./other-functions.md#other_functions-runningdiff
## runningConcurrency {#runningconcurrency}
Calculates the number of concurrent events.
Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type.
Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type.
The function calculates the total number of active (concurrent) events for each event start time.
@ -1424,11 +1424,83 @@ Result:
└───────────┴────────┘
```
## initializeAggregation {#initializeaggregation}
Calculates result of aggregate function based on single value. It is intended to use this function to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values.
**Syntax**
``` sql
initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
```
**Arguments**
- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md).
- `arg` — Arguments of aggregate function.
**Returned value(s)**
- Result of aggregation for every row passed to the function.
The return type is the same as the return type of function, that `initializeAgregation` takes as first argument.
**Example**
Query:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000));
```
Result:
```text
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘
```
Query:
```sql
SELECT finalizeAggregation(state), toTypeName(state) FROM (SELECT initializeAggregation('sumState', number % 3) AS state FROM numbers(5));
```
Result:
```text
┌─finalizeAggregation(state)─┬─toTypeName(state)─────────────┐
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
│ 2 │ AggregateFunction(sum, UInt8) │
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
└────────────────────────────┴───────────────────────────────┘
```
Example with `AggregatingMergeTree` table engine and `AggregateFunction` column:
```sql
CREATE TABLE metrics
(
key UInt64,
value AggregateFunction(sum, UInt64) DEFAULT initializeAggregation('sumState', toUInt64(0))
)
ENGINE = AggregatingMergeTree
ORDER BY key
```
```sql
INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42)))
```
**See Also**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
## finalizeAggregation {#function-finalizeaggregation}
Takes state of aggregate function. Returns result of aggregation (or finalized state when using[-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) combinator).
**Syntax**
**Syntax**
``` sql
finalizeAggregation(state)
@ -1442,7 +1514,7 @@ finalizeAggregation(state)
- Value/values that was aggregated.
Type: Value of any types that was aggregated.
Type: Value of any types that was aggregated.
**Examples**
@ -1474,7 +1546,7 @@ Result:
└──────────────────────────────────┘
```
Note that `NULL` values are ignored.
Note that `NULL` values are ignored.
Query:
@ -1520,10 +1592,9 @@ Result:
└────────┴─────────────┴────────────────┘
```
**See Also**
**See Also**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
- [initializeAggregation](../../sql-reference/aggregate-functions/reference/initializeAggregation.md)
- [initializeAggregation](#initializeaggregation)
## runningAccumulate {#runningaccumulate}

View File

@ -56,4 +56,4 @@ Result:
ATTACH TABLE name UUID '<uuid>' (col1 Type1, ...)
```
It creates new table with provided structure and attaches data from table with the specified UUID.
It creates new table with provided structure and attaches data from table with the specified UUID.

View File

@ -38,6 +38,7 @@ The list of available `SYSTEM` statements:
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTORE REPLICA](#query_language-system-restore-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES {#query_language-system-reload-emdedded-dictionaries}
@ -118,7 +119,7 @@ For manage uncompressed data cache parameters use following server level setting
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
Reset the compiled expression cache. Used in development of ClickHouse and performance tests.
Complied expression cache used when query/user/profile enable option [compile](../../operations/settings/settings.md#compile)
Compiled expression cache used when query/user/profile enable option [compile-expressions](../../operations/settings/settings.md#compile-expressions)
## FLUSH LOGS {#query_language-system-flush_logs}
@ -290,13 +291,60 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name`
### RESTART REPLICA {#query_language-system-restart-replica}
Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed
Initialization replication quene based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations.
Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed.
Initialization replication queue based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations.
``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
```
### RESTORE REPLICA {#query_language-system-restore-replica}
Restores a replica if data is [possibly] present but Zookeeper metadata is lost.
Works only on readonly `ReplicatedMergeTree` tables.
One may execute query after:
- ZooKeeper root `/` loss.
- Replicas path `/replicas` loss.
- Individual replica path `/replicas/replica_name/` loss.
Replica attaches locally found parts and sends info about them to Zookeeper.
Parts present on replica before metadata loss are not re-fetched from other replicas if not being outdated
(so replica restoration does not mean re-downloading all data over the network).
Caveat: parts in all states are moved to `detached/` folder. Parts active before data loss (Committed) are attached.
#### Syntax
```sql
SYSTEM RESTORE REPLICA [db.]replicated_merge_tree_family_table_name [ON CLUSTER cluster_name]
```
Alternative syntax:
```sql
SYSTEM RESTORE REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
#### Example
```sql
-- Creating table on multiple servers
CREATE TABLE test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/', '{replica}')
ORDER BY n PARTITION BY n % 10;
INSERT INTO test SELECT * FROM numbers(1000);
-- zookeeper_delete_path("/clickhouse/tables/test", recursive=True) <- root loss.
SYSTEM RESTART REPLICA test; -- Table will attach as readonly as metadata is missing.
SYSTEM RESTORE REPLICA test; -- Need to execute on every replica, another way: RESTORE REPLICA test ON CLUSTER cluster
```
### RESTART REPLICAS {#query_language-system-restart-replicas}
Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed

View File

@ -39,6 +39,18 @@ Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on t
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
Supports multiple replicas that must be listed by `|`. For example:
```sql
SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
or
```sql
SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
**Returned Value**
A table object with the same columns as the original MySQL table.

View File

@ -43,8 +43,20 @@ PostgreSQL Array types converts into ClickHouse arrays.
!!! info "Note"
Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows.
Supports multiple replicas that must be listed by `|`. For example:
Supports replicas priority for PostgreSQL dictionary source. The bigger the number in map, the less the priority. The highest priority is `0`.
```sql
SELECT name FROM postgresql(`postgres{1|2|3}:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
or
```sql
SELECT name FROM postgresql(`postgres1:5431|postgres2:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
Supports replicas priority for PostgreSQL dictionary source. The bigger the number in map, the less the priority. The highest priority is `0`.
**Examples**

View File

@ -817,22 +817,6 @@ load_balancing = first_or_random
のための一貫性を異なる部分に同じデータを分割)、このオプションにしているときだけサンプリングキーを設定します。
レプリカラグは制御されません。
## コンパイル {#compile}
を編集ます。 既定では、0(無効)です。
コンパイルは、クエリ処理パイプラインの一部にのみ使用されます。
この部分のパイプラインのためのクエリを実行するアによる展開の短サイクルinlining集計機能。 複数の単純な集計関数を使用するクエリでは、最大のパフォーマンスの向上が見られます。 通常、性能は軽微であります。 非常に珍しい例で遅くなクエリを実行します。
## min_count_to_compile {#min-count-to-compile}
り方を潜在的に利用コチャンクのコードの実行前に作成する。 デフォルトでは3.
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
値が1以上の場合、コンパイルは別のスレッドで非同期に実行されます。 結果は、現在実行中のクエリを含め、準備が整うとすぐに使用されます。
コンパイルされたコードは、クエリで使用される集計関数とGROUP BY句内のキーの種類のそれぞれの異なる組み合わせに必要です。
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
値がtrueの場合、json\*Int64およびUInt64形式ほとんどのJavaScript実装との互換性のためを使用するときに整数が引用符で表示されます。

View File

@ -0,0 +1,53 @@
---
toc_priority: 12
toc_title: ExternalDistributed
---
# ExternalDistributed {#externaldistributed}
Движок `ExternalDistributed` позволяет выполнять запросы `SELECT` для таблиц на удаленном сервере MySQL или PostgreSQL. Принимает в качестве аргумента табличные движки [MySQL](../../../engines/table-engines/integrations/mysql.md) или [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md), поэтому возможно шардирование.
## Создание таблицы {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password');
```
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
Структура таблицы может отличаться от структуры исходной таблицы:
- Имена столбцов должны быть такими же, как в исходной таблице, но можно использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице. ClickHouse пытается [привести](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) значения к типам данных ClickHouse.
**Параметры движка**
- `engine` — табличный движок `MySQL` или `PostgreSQL`.
- `host:port` — адрес сервера MySQL или PostgreSQL.
- `database` — имя базы данных на сервере.
- `table` — имя таблицы.
- `user` — имя пользователя.
- `password` — пароль пользователя.
## Особенности реализации {#implementation-details}
Поддерживает несколько реплик, которые должны быть перечислены через `|`, а шарды — через `,`. Например:
```sql
CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) ENGINE = ExternalDistributed('MySQL', `mysql{1|2}:3306,mysql{3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
При указании реплик для каждого из шардов при чтении выбирается одна из доступных реплик. Если соединиться не удалось, то выбирается следующая реплика, и так для всех реплик. Если попытка соединения не удалась для всех реплик, то сервер ClickHouse снова пытается соединиться с одной из реплик, перебирая их по кругу, и так несколько раз.
Вы можете указать любое количество шардов и любое количество реплик для каждого шарда.
**Смотрите также**
- [Табличный движок MySQL](../../../engines/table-engines/integrations/mysql.md)
- [Табличный движок PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [Табличный движок Distributed](../../../engines/table-engines/special/distributed.md)

View File

@ -20,11 +20,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
Структура таблицы может отличаться от исходной структуры таблицы MySQL:
Структура таблицы может отличаться от структуры исходной таблицы MySQL:
- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) значения к типам данных ClickHouse.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.
- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но можно использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [привести](../../../engines/database-engines/mysql.md#data_types-support) значения к типам данных ClickHouse.
- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
**Параметры движка**
@ -50,6 +50,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Остальные условия и ограничение выборки `LIMIT` будут выполнены в ClickHouse только после выполнения запроса к MySQL.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL(`mysql{2|3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
## Пример использования {#primer-ispolzovaniia}
Таблица в MySQL:

View File

@ -29,7 +29,7 @@ ENGINE = ODBC(connection_settings, external_database, external_table)
- Имена столбцов должны быть такими же, как в исходной таблице, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов аналогичных столбцов в исходной таблице. ClickHouse пытается [приводить](../../../engines/table-engines/integrations/odbc.md#type_conversion_function-cast) значения к типам данных ClickHouse.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.
- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
**Параметры движка**

View File

@ -20,19 +20,19 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
Структура таблицы может отличаться от исходной структуры таблицы PostgreSQL:
Структура таблицы может отличаться от структуры исходной таблицы PostgreSQL:
- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.
- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но можно использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [привести](../../../engines/database-engines/postgresql.md#data_types-support) значения к типам данных ClickHouse.
- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
**Параметры движка**
- `host:port` — адрес сервера PostgreSQL.
- `database`Имя базы данных на сервере PostgreSQL.
- `table`Имя таблицы.
- `user`Имя пользователя PostgreSQL.
- `password`Пароль пользователя PostgreSQL.
- `database`имя базы данных на сервере PostgreSQL.
- `table`имя таблицы.
- `user`имя пользователя PostgreSQL.
- `password`пароль пользователя PostgreSQL.
- `schema` — имя схемы, если не используется схема по умолчанию. Необязательный аргумент.
## Особенности реализации {#implementation-details}
@ -49,6 +49,12 @@ PostgreSQL массивы конвертируются в массивы ClickHo
!!! info "Внимание"
Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
CREATE TABLE test_replicas (id UInt32, name String) ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword');
```
При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`.

View File

@ -685,7 +685,9 @@ PARTITION BY toYYYYMM(EventDate)
SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
```
По умолчанию используется политика хранения `default` в которой есть один том и один диск, указанный в `<path>`. В данный момент менять политику хранения после создания таблицы нельзя.
По умолчанию используется политика хранения `default` в которой есть один том и один диск, указанный в `<path>`.
Изменить политику хранения после создания таблицы можно при помощи запроса [ALTER TABLE ... MODIFY SETTING]. При этом необходимо учесть, что новая политика должна содержать все тома и диски предыдущей политики с теми же именами.
Количество потоков для фоновых перемещений кусков между дисками можно изменить с помощью настройки [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size)

View File

@ -38,6 +38,8 @@ toc_title: "Клиентские библиотеки от сторонних р
- Ruby
- [ClickHouse (Ruby)](https://github.com/shlima/click_house)
- [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord)
- Rust
- [Klickhouse](https://github.com/Protryon/klickhouse)
- R
- [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r)
- [RClickhouse](https://github.com/IMSMWU/RClickhouse)

View File

@ -348,7 +348,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
## input_format_null_as_default {#settings-input-format-null-as-default}
Включает или отключает инициализацию [значениями по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) ячеек с [NULL](../../sql-reference/syntax.md#null-literal), если тип данных столбца не позволяет [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Эта настройка используется для запросов [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) для текстовых входных форматов.
@ -361,7 +361,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
## insert_null_as_default {#insert_null_as_default}
Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`.
@ -1181,22 +1181,22 @@ load_balancing = round_robin
!!! warning "Предупреждение"
Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries).
## compile_expressions {#compile-expressions}
## compile {#compile}
Включает или выключает компиляцию часто используемых функций и операторов. Компиляция производится в нативный код платформы с помощью LLVM во время выполнения.
Включить компиляцию запросов. По умолчанию - 0 (выключено).
Возможные значения:
Компиляция предусмотрена только для части конвейера обработки запроса - для первой стадии агрегации (GROUP BY).
В случае, если эта часть конвейера была скомпилирована, запрос может работать быстрее, за счёт разворачивания коротких циклов и инлайнинга вызовов агрегатных функций. Максимальный прирост производительности (до четырёх раз в редких случаях) достигается на запросах с несколькими простыми агрегатными функциями. Как правило, прирост производительности незначителен. В очень редких случаях возможно замедление выполнения запроса.
- 0 — компиляция выключена.
- 1 — компиляция включена.
## min_count_to_compile {#min-count-to-compile}
Значение по умолчанию: `1`.
После скольких раз, когда скомпилированный кусок кода мог пригодиться, выполнить его компиляцию. По умолчанию - 3.
Для тестирования можно установить значение 0: компиляция выполняется синхронно, и запрос ожидает окончания процесса компиляции перед продолжением выполнения. Во всех остальных случаях используйте значения, начинающиеся с 1. Как правило, компиляция занимает по времени около 5-10 секунд.
В случае, если значение равно 1 или больше, компиляция выполняется асинхронно, в отдельном потоке. При готовности результата, он сразу же будет использован, в том числе, уже выполняющимися в данный момент запросами.
## min_count_to_compile_expression {#min-count-to-compile-expression}
Скомпилированный код требуется для каждого разного сочетания используемых в запросе агрегатных функций и вида ключей в GROUP BY.
Результаты компиляции сохраняются в директории build в виде .so файлов. Количество результатов компиляции не ограничено, так как они не занимают много места. При перезапуске сервера, старые результаты будут использованы, за исключением случая обновления сервера - тогда старые результаты удаляются.
Минимальное количество выполнений одного и того же выражения до его компиляции.
Значение по умолчанию: `3`.
## input_format_skip_unknown_fields {#input-format-skip-unknown-fields}
@ -2721,7 +2721,7 @@ SELECT * FROM test2;
- 0 — запрос `INSERT` добавляет данные в конец файла после существующих.
- 1 — `INSERT` удаляет имеющиеся в файле данные и замещает их новыми.
Значение по умолчанию: `0`.
Значение по умолчанию: `0`.
## allow_experimental_geo_types {#allow-experimental-geo-types}
@ -2735,7 +2735,7 @@ SELECT * FROM test2;
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`.
Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`.
Возможные значения:
@ -2813,7 +2813,7 @@ SELECT * FROM test2;
**Пример**
Какие изменения привносит включение и выключение настройки:
Какие изменения привносит включение и выключение настройки:
Запрос:
@ -2957,4 +2957,83 @@ SELECT
FROM fuse_tbl
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
## flatten_nested {#flatten-nested}
Устанавливает формат данных у [вложенных](../../sql-reference/data-types/nested-data-structures/nested.md) столбцов.
Возможные значения:
- 1 — вложенный столбец преобразуется к отдельным массивам.
- 0 — вложенный столбец преобразуется к массиву кортежей.
Значение по умолчанию: `1`.
**Использование**
Если установлено значение `0`, можно использовать любой уровень вложенности.
**Примеры**
Запрос:
``` sql
SET flatten_nested = 1;
CREATE TABLE t_nest (`n` Nested(a UInt32, b UInt32)) ENGINE = MergeTree ORDER BY tuple();
SHOW CREATE TABLE t_nest;
```
Результат:
``` text
┌─statement───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_nest
(
`n.a` Array(UInt32),
`n.b` Array(UInt32)
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 8192 │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Запрос:
``` sql
SET flatten_nested = 0;
CREATE TABLE t_nest (`n` Nested(a UInt32, b UInt32)) ENGINE = MergeTree ORDER BY tuple();
SHOW CREATE TABLE t_nest;
```
Результат:
``` text
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_nest
(
`n` Nested(a UInt32, b UInt32)
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## external_table_functions_use_nulls {#external-table-functions-use-nulls}
Определяет, как табличные функции [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) и [odbc](../../sql-reference/table-functions/odbc.md)] используют Nullable столбцы.
Возможные значения:
- 0 — табличная функция явно использует Nullable столбцы.
- 1 — табличная функция неявно использует Nullable столбцы.
Значение по умолчанию: `1`.
**Использование**
Если установлено значение `0`, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.

View File

@ -1,40 +0,0 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
**Синтаксис**
``` sql
initializeAggregation (aggregate_function, column_1, column_2)
```
**Аргументы**
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
Пример:
Возвращаемый тип функций с суффиксом `State``AggregateFunction`.
**Пример**
Запрос:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Результат:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -5,11 +5,9 @@ toc_title: Array(T)
# Array(T) {#data-type-array}
Массив из элементов типа `T`.
Массив из элементов типа `T`. `T` может любым, в том числе массивом. Таким образом поддерживаются многомерные массивы.
`T` может любым, в том числе, массивом. Таким образом поддержаны многомерные массивы.
## Создание массива {#sozdanie-massiva}
## Создание массива {#creating-an-array}
Массив можно создать с помощью функции:
@ -45,7 +43,7 @@ SELECT [1, 2] AS x, toTypeName(x)
└───────┴────────────────────┘
```
## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh}
## Особенности работы с типами данных {#working-with-data-types}
Максимальный размер массива ограничен одним миллионом элементов.
@ -76,3 +74,26 @@ Received exception from server (version 1.1.54388):
Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
```
## Размер массива {#array-size}
Узнать размер массива можно с помощью подстолбца `size0` без чтения всего столбца. Для многомерных массивов можно использовать подстолбец `sizeN-1`, где `N` — требуемое измерение.
**Пример**
Запрос:
```sql
CREATE TABLE t_arr (`arr` Array(Array(Array(UInt32)))) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_arr VALUES ([[[12, 13, 0, 1],[12]]]);
SELECT arr.size0, arr.size1, arr.size2 FROM t_arr;
```
Результат:
``` text
┌─arr.size0─┬─arr.size1─┬─arr.size2─┐
│ 1 │ [2] │ [[4,1]] │
└───────────┴───────────┴───────────┘
```

View File

@ -29,7 +29,7 @@ CREATE TABLE test.visits
В этом примере объявлена вложенная структура данных `Goals`, содержащая данные о достижении целей. Каждой строке таблицы visits может соответствовать от нуля до произвольного количества достижений целей.
Поддерживается только один уровень вложенности. Столбцы вложенных структур, содержащие массивы, эквивалентны многомерным массивам, поэтому их поддержка ограничена (не поддерживается хранение таких столбцов в таблицах с движком семейства MergeTree).
Если настройка [flatten_nested](../../../operations/settings/settings.md#flatten-nested) установлена в значение `0` (что не является значением по умолчанию), поддерживаются любые уровни вложенности.
В большинстве случаев, при работе с вложенной структурой данных, указываются отдельные её столбцы. Для этого, имена столбцов указываются через точку. Эти столбцы представляют собой массивы соответствующих типов. Все столбцы-массивы одной вложенной структуры данных имеют одинаковые длины.

View File

@ -13,7 +13,7 @@ toc_title: Nullable
`NULL` — значение по умолчанию для типа `Nullable`, если в конфигурации сервера ClickHouse не указано иное.
## Особенности хранения {#osobennosti-khraneniia}
## Особенности хранения {#storage-features}
Для хранения значения типа `Nullable` ClickHouse использует:
@ -27,7 +27,34 @@ toc_title: Nullable
!!! info "Info"
Почти всегда использование `Nullable` снижает производительность, учитывайте это при проектировании своих баз.
## Пример использования {#primer-ispolzovaniia}
## Поиск NULL {#finding-null}
Найти в столбце значения `NULL` можно с помощью подстолбца `null`, при этом весь столбец считывать не требуется. Подстолбец содержит `1`, если соответствующее значение равно `NULL`, и `0` если не равно.
**Пример**
Запрос:
``` sql
CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO nullable VALUES (1) (NULL) (2) (NULL);
SELECT n.null FROM nullable;
```
Результат:
``` text
┌─n.null─┐
│ 0 │
│ 1 │
│ 0 │
│ 1 │
└────────┘
```
## Пример использования {#usage-example}
``` sql
CREATE TABLE t_null(x Int8, y Nullable(Int8)) ENGINE TinyLog
@ -47,4 +74,3 @@ SELECT x + y from t_null
│ 5 │
└────────────┘
```

View File

@ -47,3 +47,30 @@ SELECT tuple(1,NULL) AS x, toTypeName(x)
└──────────┴─────────────────────────────────┘
```
## Адресация элементов кортежа {#addressing-tuple-elements}
К элементам кортежа можно обращаться по индексу и по имени:
``` sql
CREATE TABLE named_tuples (`a` Tuple(s String, i Int64)) ENGINE = Memory;
INSERT INTO named_tuples VALUES (('y', 10)), (('x',-10));
SELECT a.s FROM named_tuples;
SELECT a.2 FROM named_tuples;
```
Результат:
``` text
┌─a.s─┐
│ y │
│ x │
└─────┘
┌─tupleElement(a, 2)─┐
│ 10 │
│ -10 │
└────────────────────┘
```

View File

@ -61,6 +61,7 @@ SETTINGS(format_csv_allow_single_quotes = 0)
- [Локальный файл](#dicts-external_dicts_dict_sources-local_file)
- [Исполняемый файл](#dicts-external_dicts_dict_sources-executable)
- [Исполняемый пул](#dicts-external_dicts_dict_sources-executable_pool)
- [HTTP(s)](#dicts-external_dicts_dict_sources-http)
- СУБД:
- [ODBC](#dicts-external_dicts_dict_sources-odbc)
@ -69,6 +70,7 @@ SETTINGS(format_csv_allow_single_quotes = 0)
- [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse)
- [MongoDB](#dicts-external_dicts_dict_sources-mongodb)
- [Redis](#dicts-external_dicts_dict_sources-redis)
- [Cassandra](#dicts-external_dicts_dict_sources-cassandra)
- [PostgreSQL](#dicts-external_dicts_dict_sources-postgresql)
## Локальный файл {#dicts-external_dicts_dict_sources-local_file}
@ -93,7 +95,7 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
Поля настройки:
- `path` — абсолютный путь к файлу.
- `format` — формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
- `format` — формат файла. Поддерживаются все форматы, описанные в разделе [Форматы](../../../interfaces/formats.md#formats).
Если словарь с источником `FILE` создается с помощью DDL-команды (`CREATE DICTIONARY ...`), источник словаря должен быть расположен в каталоге `user_files`. Иначе пользователи базы данных будут иметь доступ к произвольному файлу на узле ClickHouse.
@ -112,6 +114,7 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
<executable>
<command>cat /opt/dictionaries/os.tsv</command>
<format>TabSeparated</format>
<implicit_key>false</implicit_key>
</executable>
</source>
```
@ -119,9 +122,41 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
Поля настройки:
- `command` — абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в `PATH`).
- `format` — формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
- `format` — формат файла. Поддерживаются все форматы, описанные в разделе [Форматы](../../../interfaces/formats.md#formats).
- `implicit_key` — исходный исполняемый файл может возвращать только значения, а соответствие запрошенным ключам определено неявно — порядком строк в результате. Значение по умолчанию: false. Необязательный параметр.
Этот источник словаря может быть настроен только с помощью XML-конфигурации. Создание словарей с исполняемым источником с помощью DDL отключено. Иначе пользователь базы данных сможет выполнить произвольный бинарный файл на узле ClickHouse.
Этот источник словаря может быть настроен только с помощью XML-конфигурации. Создание словарей с исполняемым источником с помощью DDL запрещено. Иначе пользователь сможет выполнить произвольный бинарный файл на сервере ClickHouse.
## Исполняемый пул {#dicts-external_dicts_dict_sources-executable_pool}
Исполняемый пул позволяет загружать данные из пула процессов. Этот источник не работает со словарями, которые требуют загрузки всех данных из источника. Исполняемый пул работает словарями, которые размещаются [следующими способами](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory): `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct`.
Исполняемый пул генерирует пул процессов с помощью указанной команды и оставляет их активными, пока они не завершатся. Программа считывает данные из потока STDIN пока он доступен и выводит результат в поток STDOUT, а затем ожидает следующего блока данных из STDIN. ClickHouse не закрывает поток STDIN после обработки блока данных и отправляет в него следующую порцию данных, когда это требуется. Исполняемый скрипт должен быть готов к такому способу обработки данных — он должен заранее опрашивать STDIN и отправлять данные в STDOUT.
Пример настройки:
``` xml
<source>
<executable_pool>
<command><command>while read key; do printf "$key\tData for key $key\n"; done</command</command>
<format>TabSeparated</format>
<pool_size>10</pool_size>
<max_command_execution_time>10<max_command_execution_time>
<implicit_key>false</implicit_key>
</executable_pool>
</source>
```
Поля настройки:
- `command` — абсолютный путь к файлу или имя файла (если каталог программы записан в `PATH`).
- `format` — формат файла. Поддерживаются все форматы, описанные в “[Форматы](../../../interfaces/formats.md#formats)”.
- `pool_size` — размер пула. Если в поле `pool_size` указан 0, то размер пула не ограничен.
- `command_termination_timeout` — скрипт исполняемого пула должен включать основной цикл чтения-записи. После уничтожения словаря канал закрывается. При этом исполняемый файл имеет `command_termination_timeout` секунд для завершения работы, прежде чем ClickHouse пошлет сигнал SIGTERM дочернему процессу. Указывается в секундах. Значение по умолчанию: 10. Необязательный параметр.
- `max_command_execution_time` — максимальное количество времени для исполняемого скрипта на обработку блока данных. Указывается в секундах. Значение по умолчанию: 10. Необязательный параметр.
- `implicit_key` — исходный исполняемый файл может возвращать только значения, а соответствие запрошенным ключам определено неявно — порядком строк в результате. Значение по умолчанию: false. Необязательный параметр.
Этот источник словаря может быть настроен только с помощью XML-конфигурации. Создание словарей с исполняемым источником с помощью DDL запрещено. Иначе пользователь сможет выполнить произвольный бинарный файл на сервере ClickHouse.
## HTTP(s) {#dicts-external_dicts_dict_sources-http}
@ -451,6 +486,7 @@ LIFETIME(MIN 300 MAX 360)
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -468,6 +504,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```
@ -492,6 +529,8 @@ SOURCE(MYSQL(
- `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external-dicts-dict-lifetime.md).
- `fail_on_connection_loss` параметр конфигурации, контролирующий поведение сервера при потере соединения. Если значение `true`, то исключение генерируется сразу же, если соединение между клиентом и сервером было потеряно. Если значение `false`, то сервер повторно попытается выполнить запрос три раза прежде чем сгенерировать исключение. Имейте в виду, что повторные попытки могут увеличить время выполнения запроса. Значение по умолчанию: `false`.
MySQL можно подключить на локальном хосте через сокеты, для этого необходимо задать `host` и `socket`.
Пример настройки:
@ -507,6 +546,7 @@ MySQL можно подключить на локальном хосте чер
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -523,6 +563,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```
@ -729,4 +770,3 @@ Setting fields:
- `where` Условие выборки. Синтаксис для условий такой же как для `WHERE` выражения в PostgreSQL, для примера, `id > 10 AND id < 20`. Необязательный параметр.
- `invalidate_query` Запрос для проверки условия загрузки словаря. Необязательный параметр. Читайте больше в разделе [Обновление словарей](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

View File

@ -39,10 +39,49 @@ toc_title: "Массивы"
Принимает пустой массив и возвращает массив из одного элемента, равного значению по умолчанию.
## range(N) {#rangen}
Возвращает массив чисел от 0 до N-1.
На всякий случай, если на блок данных, создаются массивы суммарной длины больше 100 000 000 элементов, то кидается исключение.
## range(end), range(\[start, \] end \[, step\]) {#range}
Возвращает массив чисел от `start` до `end - 1` с шагом `step`.
**Синтаксис**
``` sql
range([start, ] end [, step])
```
**Аргументы**
- `start` — начало диапазона. Обязательно, когда указан `step`. По умолчанию равно `0`. Тип: [UInt](../data-types/int-uint.md)
- `end` — конец диапазона. Обязательный аргумент. Должен быть больше, чем `start`. Тип: [UInt](../data-types/int-uint.md)
- `step` — шаг обхода. Необязательный аргумент. По умолчанию равен `1`. Тип: [UInt](../data-types/int-uint.md)
**Возвращаемые значения**
- массив `UInt` чисел от `start` до `end - 1` с шагом `step`
**Особенности реализации**
- Не поддерживаются отрицательные значения аргументов: `start`, `end`, `step` имеют тип `UInt`.
- Если в результате запроса создаются массивы суммарной длиной больше 100 000 000 элементов, то генерируется исключение.
**Примеры**
Запрос:
``` sql
SELECT range(5), range(1, 5), range(1, 5, 2);
```
Ответ:
```txt
┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │
└─────────────┴─────────────┴────────────────┘
```
## array(x1, …), оператор \[x1, …\] {#arrayx1-operator-x1}
@ -1576,4 +1615,4 @@ SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as r
┌─res─┬─toTypeName(arrayProduct(array(toDecimal64(1, 8), toDecimal64(2, 8), toDecimal64(3, 8))))─┐
│ 6 │ Float64 │
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
```
```

View File

@ -13,7 +13,7 @@ toc_title: "Прочие функции"
Возвращает именованное значение из секции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурации сервера.
**Синтаксис**
**Синтаксис**
```sql
getMacro(name)
@ -854,8 +854,8 @@ WHERE diff != 1
## runningConcurrency {#runningconcurrency}
Подсчитывает количество одновременно идущих событий.
У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных.
Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке.
У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных.
Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке.
!!! warning "Предупреждение"
События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение.
@ -1371,11 +1371,84 @@ SELECT formatReadableSize(filesystemCapacity()) AS "Capacity", toTypeName(filesy
└───────────┴────────┘
```
## initializeAggregation {#initializeaggregation}
Вычисляет результат агрегатной функции для каждой строки. Предназначена для инициализации агрегатных функций с комбинатором [-State](../../sql-reference/aggregate-functions/combinators.md#state). Может быть полезна для создания состояний агрегатных функций для последующей их вставки в столбцы типа [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) или использования в качестве значений по-умолчанию.
**Синтаксис**
``` sql
initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
```
**Аргументы**
- `aggregate_function` — название агрегатной функции, состояние которой нужно создать. [String](../../sql-reference/data-types/string.md#string).
- `arg` — аргументы, которые передаются в агрегатную функцию.
**Возвращаемое значение**
- В каждой строке результат агрегатной функции, примененной к аргументам из этой строки.
Тип возвращаемого значения такой же, как и у функции, переданной первым аргументом.
**Пример**
Запрос:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000));
```
Результат:
```text
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘
```
Запрос:
```sql
SELECT finalizeAggregation(state), toTypeName(state) FROM (SELECT initializeAggregation('sumState', number % 3) AS state FROM numbers(5));
```
Результат:
```text
┌─finalizeAggregation(state)─┬─toTypeName(state)─────────────┐
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
│ 2 │ AggregateFunction(sum, UInt8) │
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
└────────────────────────────┴───────────────────────────────┘
```
Пример с движком таблиц `AggregatingMergeTree` и столбцом типа `AggregateFunction`:
```sql
CREATE TABLE metrics
(
key UInt64,
value AggregateFunction(sum, UInt64) DEFAULT initializeAggregation('sumState', toUInt64(0))
)
ENGINE = AggregatingMergeTree
ORDER BY key
```
```sql
INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42)))
```
**Смотрите также**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
## finalizeAggregation {#function-finalizeaggregation}
Принимает состояние агрегатной функции. Возвращает результат агрегирования (или конечное состояние при использовании комбинатора [-State](../../sql-reference/aggregate-functions/combinators.md#state)).
**Синтаксис**
**Синтаксис**
``` sql
finalizeAggregation(state)
@ -1421,7 +1494,7 @@ SELECT finalizeAggregation(( SELECT sumState(number) FROM numbers(10)));
└──────────────────────────────────┘
```
Обратите внимание, что значения `NULL` игнорируются.
Обратите внимание, что значения `NULL` игнорируются.
Запрос:
@ -1470,7 +1543,7 @@ FROM numbers(10);
**Смотрите также**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
- [initializeAggregation](../../sql-reference/aggregate-functions/reference/initializeAggregation.md)
- [initializeAggregation](#initializeaggregation)
## runningAccumulate {#runningaccumulate}
@ -1537,13 +1610,13 @@ SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k)
Запрос:
```sql
SELECT
SELECT
grouping,
item,
runningAccumulate(state, grouping) AS res
FROM
FROM
(
SELECT
SELECT
toInt8(number / 4) AS grouping,
number AS item,
sumState(number) AS state
@ -1732,7 +1805,7 @@ SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers
randomString(length)
```
**Аргументы**
**Аргументы**
- `length` — длина строки. Положительное целое число.
@ -1831,13 +1904,13 @@ randomStringUTF8(length)
Запрос:
```sql
```sql
SELECT randomStringUTF8(13)
```
Результат:
```text
```text
┌─randomStringUTF8(13)─┐
│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │
└──────────────────────┘
@ -1848,13 +1921,13 @@ SELECT randomStringUTF8(13)
Возвращает текущее значение [пользовательской настройки](../../operations/settings/index.md#custom_settings).
**Синтаксис**
**Синтаксис**
```sql
getSetting('custom_setting')
```
**Параметр**
**Параметр**
- `custom_setting` — название настройки. [String](../../sql-reference/data-types/string.md).
@ -1866,7 +1939,7 @@ getSetting('custom_setting')
```sql
SET custom_a = 123;
SELECT getSetting('custom_a');
SELECT getSetting('custom_a');
```
**Результат**
@ -1875,7 +1948,7 @@ SELECT getSetting('custom_a');
123
```
**См. также**
**См. также**
- [Пользовательские настройки](../../operations/settings/index.md#custom_settings)
@ -1889,10 +1962,10 @@ SELECT getSetting('custom_a');
isDecimalOverflow(d, [p])
```
**Аргументы**
**Аргументы**
- `d` — число. [Decimal](../../sql-reference/data-types/decimal.md).
- `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
- `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Возвращаемое значение**
@ -1926,7 +1999,7 @@ SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9),
countDigits(x)
```
**Аргументы**
**Аргументы**
- `x` — [целое](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) или [дробное](../../sql-reference/data-types/decimal.md) число.

View File

@ -5,14 +5,14 @@ toc_title: SYSTEM
# Запросы SYSTEM {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [RELOAD MODELS](#query_language-system-reload-models)
- [RELOAD MODEL](#query_language-system-reload-model)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
@ -24,10 +24,10 @@ toc_title: SYSTEM
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
@ -36,13 +36,13 @@ toc_title: SYSTEM
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
Перегружает все [Встроенные словари](../dictionaries/internal-dicts.md).
По умолчанию встроенные словари выключены.
По умолчанию встроенные словари выключены.
Всегда возвращает `Ok.`, вне зависимости от результата обновления встроенных словарей.
## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
Перегружает все словари, которые были успешно загружены до этого.
@ -115,7 +115,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
Сбрасывает кеш скомпилированных выражений. Используется при разработке ClickHouse и тестах производительности.
Компилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile](../../operations/settings/settings.md#compile)
омпилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile-expressions](../../operations/settings/settings.md#compile-expressions)
## FLUSH LOGS {#query_language-system-flush_logs}
@ -194,7 +194,7 @@ SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name
SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
```
### START TTL MERGES {#query_language-start-ttl-merges}
### START TTL MERGES {#query_language-start-ttl-merges}
Запускает фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -203,7 +203,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
```
### STOP MOVES {#query_language-stop-moves}
### STOP MOVES {#query_language-stop-moves}
Позволяет остановить фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -212,7 +212,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
```
### START MOVES {#query_language-start-moves}
### START MOVES {#query_language-start-moves}
Запускает фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -261,7 +261,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
@ -269,7 +269,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
@ -277,7 +277,7 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
### SYNC REPLICA {#query_language-system-sync-replica}
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:
``` sql
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name

View File

@ -38,6 +38,18 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
Остальные условия и ограничение выборки `LIMIT` будут выполнены в ClickHouse только после выполнения запроса к MySQL.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
или
```sql
SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
**Возвращаемое значение**
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.

View File

@ -43,6 +43,18 @@ PostgreSQL массивы конвертируются в массивы ClickHo
!!! info "Примечание"
Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
SELECT name FROM postgresql(`postgres{1|2|3}:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
или
```sql
SELECT name FROM postgresql(`postgres1:5431|postgres2:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`.

View File

@ -47,6 +47,13 @@ When all prerequisites are installed, running `build.py` without args (there are
The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888.
## How to change code highlighting? {#how-to-change-code-hl}
ClickHouse does not use mkdocs `highlightjs` feature. It uses modified pygments styles instead.
If you want to change code highlighting, edit the `website/css/highlight.css` file.
Currently, an [eighties](https://github.com/idleberg/base16-pygments/blob/master/css/base16-eighties.dark.css) theme
is used.
## How to subscribe on documentation changes? {#how-to-subscribe-on-documentation-changes}
At the moment theres no easy way to do just that, but you can consider:

View File

@ -87,6 +87,7 @@ def build_for_lang(lang, args):
website_url = 'https://clickhouse.tech'
site_name = site_names.get(lang, site_names['en']) % ''
site_name = site_name.replace(' ', ' ')
raw_config = dict(
site_name=site_name,
site_url=f'{website_url}/docs/{lang}/',

View File

@ -0,0 +1,145 @@
---
toc_priority: 11
toc_title: PostgreSQL
---
# PostgreSQL {#postgresql}
PostgreSQL 引擎允许 ClickHouse 对存储在远程 PostgreSQL 服务器上的数据执行 `SELECT``INSERT` 查询.
## 创建一张表 {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
```
<!-- 详情请见 [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) 查询. -->
表结构可以与 PostgreSQL 源表结构不同:
- 列名应与 PostgreSQL 源表中的列名相同,但您可以按任何顺序使用其中的一些列。
- 列类型可能与源表中的列类型不同。 ClickHouse尝试将数值[映射](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) 到ClickHouse的数据类型。
- 设置 `external_table_functions_use_nulls` 来定义如何处理 Nullable 列. 默认值是 1, 当设置为 0 时 - 表函数将不会使用 nullable 列,而是插入默认值来代替 null. 这同样适用于数组数据类型中的 null 值.
**引擎参数**
- `host:port` — PostgreSQL 服务器地址.
- `database` — 数据库名称.
- `table` — 表名称.
- `user` — PostgreSQL 用户.
- `password` — 用户密码.
- `schema` — Non-default table schema. 可选.
## 实施细节 {#implementation-details}
在 PostgreSQL 上的 `SELECT` 查询以 `COPY (SELECT ...) TO STDOUT` 的方式在只读 PostgreSQL 事务中运行,每次 `SELECT` 查询后提交。
简单的 `WHERE` 子句,如`=``=``>``>=``<``<=`,和`IN`是在PostgreSQL 服务器上执行。
所有的连接、聚合、排序、`IN [ array ]`条件和`LIMIT`采样约束都是在 PostgreSQL 的查询结束后才在ClickHouse中执行的。
在 PostgreSQL 上的 `INSERT` 查询以 `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` 的方式在 PostgreSQL 事务中运行,每条 `INSERT` 语句后自动提交。
PostgreSQL 的 `Array` 类型会被转换为 ClickHouse 数组。
!!! info "Note"
要小心 - 一个在 PostgreSQL 中的数组数据,像`type_name[]`这样创建,可以在同一列的不同表行中包含不同维度的多维数组。但是在 ClickHouse 中,只允许在同一列的所有表行中包含相同维数的多维数组。
支持设置 PostgreSQL 字典源中 Replicas 的优先级。地图中的数字越大,优先级就越低。最高的优先级是 `0`
在下面的例子中,副本`example01-1`有最高的优先级。
```xml
<postgresql>
<port>5432</port>
<user>clickhouse</user>
<password>qwerty</password>
<replica>
<host>example01-1</host>
<priority>1</priority>
</replica>
<replica>
<host>example01-2</host>
<priority>2</priority>
</replica>
<db>db_name</db>
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
</postgresql>
</source>
```
## 用法示例 {#usage-example}
PostgreSQL 中的表:
``` text
postgres=# CREATE TABLE "public"."test" (
"int_id" SERIAL,
"int_nullable" INT NULL DEFAULT NULL,
"float" FLOAT NOT NULL,
"str" VARCHAR(100) NOT NULL DEFAULT '',
"float_nullable" FLOAT NULL DEFAULT NULL,
PRIMARY KEY (int_id));
CREATE TABLE
postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
ClickHouse 中的表, 从上面创建的 PostgreSQL 表中检索数据:
``` sql
CREATE TABLE default.postgresql_table
(
`float_nullable` Nullable(Float32),
`str` String,
`int_id` Int32
)
ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
```
``` sql
SELECT * FROM postgresql_table WHERE str IN ('test');
```
``` text
┌─float_nullable─┬─str──┬─int_id─┐
│ ᴺᵁᴸᴸ │ test │ 1 │
└────────────────┴──────┴────────┘
```
使用非默认的模式:
```text
postgres=# CREATE SCHEMA "nice.schema";
postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
```
```sql
CREATE TABLE pg_table_schema_with_dots (a UInt32)
ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
```
**另请参阅**
<!-- - [`postgresql` 表函数](../../../sql-reference/table-functions/postgresql.md) -->
- [使用 PostgreSQL 作为外部字典的来源](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/postgresql/) <!--hide-->

View File

@ -1,4 +1,4 @@
# 设置 {#set}
# 集合 {#set}
始终存在于 RAM 中的数据集。它适用于IN运算符的右侧请参见 «IN运算符» 部分)。

File diff suppressed because it is too large Load Diff

View File

@ -817,21 +817,22 @@ load_balancing = first_or_random
为了保持一致性(以获取相同数据拆分的不同部分),此选项仅在设置了采样键时有效。
副本滞后不受控制。
## 编译 {#compile}
## compile_expressions {#compile-expressions}
启用查询的编译。 默认情况下0禁用
啟用或禁用在運行時使用 LLVM 將常用的簡單函數和運算符編譯為本機代碼
编译仅用于查询处理管道的一部分用于聚合的第一阶段GROUP BY
如果编译了管道的这一部分,则由于部署周期较短和内联聚合函数调用,查询可能运行得更快。 对于具有多个简单聚合函数的查询,可以看到最大的性能改进(在极少数情况下可快四倍)。 通常,性能增益是微不足道的。 在极少数情况下,它可能会减慢查询执行速度。
可能的值:
## min_count_to_compile {#min-count-to-compile}
- 0 — 禁用。
- 1 — 啟用。
在运行编译之前可能使用已编译代码块的次数。 默认情况下3。
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
如果该值为1或更大则编译在单独的线程中异步进行。 结果将在准备就绪后立即使用,包括当前正在运行的查询。
默認值:`1`。
对于查询中使用的聚合函数的每个不同组合以及GROUP BY子句中的键类型都需要编译代码。
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## min_count_to_compile_expression {#min-count-to-compile-expression}
在編譯之前執行相同表達式的最小計數。
默認值:`3`。
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}

View File

@ -95,7 +95,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
重置已编译的表达式缓存。用于ClickHouse开发和性能测试。
`query/user/profile` 启用配置项 [compile](../../operations/settings/settings.md#compile)时,编译的表达式缓存开启。
`query/user/profile` 启用配置项 [compile-expressions](../../operations/settings/settings.md#compile-expressions)时,编译的表达式缓存开启。
## FLUSH LOGS {#query_language-system-flush_logs}
@ -209,7 +209,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
### STOP FETCHES {#query_language-system-stop-fetches}
停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
@ -218,7 +218,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
### START FETCHES {#query_language-system-start-fetches}
启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
@ -227,7 +227,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
@ -235,7 +235,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]

View File

@ -0,0 +1,120 @@
---
toc_priority: 42
toc_title: postgresql
---
# postgresql {#postgresql}
允许对存储在远程 PostgreSQL 服务器上的数据进行 `SELECT``INSERT` 查询.
**语法**
``` sql
postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`])
```
**参数**
- `host:port` — PostgreSQL 服务器地址.
- `database` — 远程数据库名称.
- `table` — 远程表名称.
- `user` — PostgreSQL 用户.
- `password` — 用户密码.
- `schema` — 非默认的表结构. 可选.
**返回值**
一个表对象,其列数与原 PostgreSQL 表的列数相同。
!!! info "Note"
在`INSERT`查询中,为了区分表函数`postgresql(..)`和表名以及表的列名列表,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。请看下面的例子。
## 实施细节 {#implementation-details}
`SELECT`查询在 PostgreSQL 上以 `COPY (SELECT ...) TO STDOUT` 的方式在只读的 PostgreSQL 事务中运行,每次在`SELECT`查询后提交。
简单的`WHERE`子句,如`=`、`=`、`>`、`>=`、`<`、`<=`和`IN`在PostgreSQL服务器上执行。
所有的连接、聚合、排序,`IN [ 数组 ]`条件和`LIMIT`采样约束只有在对PostgreSQL的查询结束后才会在ClickHouse中执行。
PostgreSQL 上的`INSERT`查询以`COPY "table_name" (field1, field2, ... fieldN) FROM STDIN`的方式在 PostgreSQL 事务中运行,每次`INSERT`语句后自动提交。
PostgreSQL 数组类型将转换为 ClickHouse 数组。
!!! info "Note"
要小心,在 PostgreSQL 中,像 Integer[] 这样的数组数据类型列可以在不同的行中包含不同维度的数组,但在 ClickHouse 中,只允许在所有的行中有相同维度的多维数组。
支持设置 PostgreSQL 字典源中 Replicas 的优先级。地图中的数字越大,优先级就越低。`0`代表最高的优先级。
**示例**
PostgreSQL 中的表:
``` text
postgres=# CREATE TABLE "public"."test" (
"int_id" SERIAL,
"int_nullable" INT NULL DEFAULT NULL,
"float" FLOAT NOT NULL,
"str" VARCHAR(100) NOT NULL DEFAULT '',
"float_nullable" FLOAT NULL DEFAULT NULL,
PRIMARY KEY (int_id));
CREATE TABLE
postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
从 ClickHouse 检索数据:
```sql
SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password') WHERE str IN ('test');
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴──────┴────────────────┘
```
插入数据:
```sql
INSERT INTO TABLE FUNCTION postgresql('localhost:5432', 'test', 'test', 'postgrsql_user', 'password') (int_id, float) VALUES (2, 3);
SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password');
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │
│ 2 │ ᴺᵁᴸᴸ │ 3 │ │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴──────┴────────────────┘
```
使用非默认的表结构:
```text
postgres=# CREATE SCHEMA "nice.schema";
postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
```
```sql
CREATE TABLE pg_table_schema_with_dots (a UInt32)
ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
```
**另请参阅**
- [PostgreSQL 表引擎](../../engines/table-engines/integrations/postgresql.md)
- [使用 PostgreSQL 作为外部字典的来源](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[原始文章](https://clickhouse.tech/docs/en/sql-reference/table-functions/postgresql/) <!--hide-->

View File

@ -33,6 +33,7 @@ option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data t
${ENABLE_CLICKHOUSE_ALL})
# https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/
# TODO Also needs NANODBC.
if (ENABLE_ODBC)
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
${ENABLE_CLICKHOUSE_ALL})

View File

@ -20,6 +20,7 @@ CLICKHOUSE_QueryProcessingStage=(
fetch_columns
with_mergeable_state
with_mergeable_state_after_aggregation
with_mergeable_state_after_aggregation_and_limit
)
CLICKHOUSE_Format=(

View File

@ -580,7 +580,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
("query", value<std::string>()->default_value(""), "query to execute")
("concurrency,c", value<unsigned>()->default_value(1), "number of parallel queries")
("delay,d", value<double>()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)")
("stage", value<std::string>()->default_value("complete"), "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation")
("stage", value<std::string>()->default_value("complete"), "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
("iterations,i", value<size_t>()->default_value(0), "amount of queries to be executed")
("timelimit,t", value<double>()->default_value(0.), "stop launch of queries after specified time limit")
("randomize,r", value<bool>()->default_value(false), "randomize order of execution")

View File

@ -29,7 +29,6 @@
#include <common/find_symbols.h>
#include <common/LineReader.h>
#include <Common/ClickHouseRevision.h>
#include <Common/Stopwatch.h>
#include <Common/Exception.h>
#include <Common/ShellCommand.h>
#include <Common/UnicodeBar.h>
@ -85,7 +84,7 @@
#include <common/argsToConfig.h>
#include <Common/TerminalSize.h>
#include <Common/UTF8Helpers.h>
#include <Common/ProgressBar.h>
#include <Common/ProgressIndication.h>
#include <filesystem>
#include <Common/filesystemHelpers.h>
@ -113,6 +112,7 @@ namespace ErrorCodes
extern const int DEADLOCK_AVOIDED;
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int SYNTAX_ERROR;
extern const int TOO_DEEP_RECURSION;
}
@ -230,13 +230,13 @@ private:
String server_version;
String server_display_name;
Stopwatch watch;
/// true by default - for interactive mode, might be changed when --progress option is checked for
/// non-interactive mode.
bool need_render_progress = true;
/// The server periodically sends information about how much data was read since last time.
Progress progress;
bool written_first_block = false;
/// Progress bar
ProgressBar progress_bar;
ProgressIndication progress_indication;
/// External tables info.
std::list<ExternalTable> external_tables;
@ -536,7 +536,7 @@ private:
if (!is_interactive)
{
progress_bar.need_render_progress = config().getBool("progress", false);
need_render_progress = config().getBool("progress", false);
echo_queries = config().getBool("echo", false);
ignore_error = config().getBool("ignore-error", false);
}
@ -577,7 +577,18 @@ private:
}
if (!history_file.empty() && !fs::exists(history_file))
FS::createFile(history_file);
{
/// Avoid TOCTOU issue.
try
{
FS::createFile(history_file);
}
catch (const ErrnoException & e)
{
if (e.getErrno() != EEXIST)
throw;
}
}
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {";", "\\G"};
@ -1268,7 +1279,8 @@ private:
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::SYNTAX_ERROR)
if (e.code() != ErrorCodes::SYNTAX_ERROR &&
e.code() != ErrorCodes::TOO_DEEP_RECURSION)
throw;
}
@ -1450,10 +1462,9 @@ private:
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::SYNTAX_ERROR)
{
if (e.code() != ErrorCodes::SYNTAX_ERROR &&
e.code() != ErrorCodes::TOO_DEEP_RECURSION)
throw;
}
}
if (ast_2)
@ -1578,12 +1589,9 @@ private:
}
}
watch.restart();
processed_rows = 0;
progress.reset();
progress_bar.show_progress_bar = false;
progress_bar.written_progress_chars = 0;
progress_bar.written_first_block = false;
written_first_block = false;
progress_indication.resetProgress();
{
/// Temporarily apply query settings to context.
@ -1651,16 +1659,15 @@ private:
if (is_interactive)
{
std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << watch.elapsedSeconds() << " sec. ";
if (progress.read_rows >= 1000)
writeFinalProgress();
std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
/// Write final progress if it makes sense to do so.
writeFinalProgress();
std::cout << std::endl << std::endl;
}
else if (print_time_to_stderr)
{
std::cerr << watch.elapsedSeconds() << "\n";
std::cerr << progress_indication.elapsedSeconds() << "\n";
}
}
@ -1835,6 +1842,19 @@ private:
/// Send data read from stdin.
try
{
if (need_render_progress)
{
/// Set total_bytes_to_read for current fd.
FileProgress file_progress(0, std_in.size());
progress_indication.updateProgress(Progress(file_progress));
/// Set callback to be called on file progress.
progress_indication.setFileProgressCallback(context, true);
/// Add callback to track reading from fd.
std_in.setProgressCallback(context);
}
sendDataFrom(std_in, sample, columns_description);
}
catch (Exception & e)
@ -1957,7 +1977,7 @@ private:
cancelled = true;
if (is_interactive)
{
progress_bar.clearProgress();
progress_indication.clearProgressOutput();
std::cout << "Cancelling query." << std::endl;
}
@ -2184,7 +2204,7 @@ private:
current_format = "Vertical";
/// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
if (!progress_bar.need_render_progress)
if (!need_render_progress)
block_out_stream = context->getOutputStreamParallelIfPossible(current_format, *out_buf, block);
else
block_out_stream = context->getOutputStream(current_format, *out_buf, block);
@ -2243,25 +2263,25 @@ private:
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
return;
if (progress_bar.need_render_progress)
progress_bar.clearProgress();
if (need_render_progress)
progress_indication.clearProgressOutput();
block_out_stream->write(block);
progress_bar.written_first_block = true;
written_first_block = true;
/// Received data block is immediately displayed to the user.
block_out_stream->flush();
/// Restore progress bar after data block.
if (progress_bar.need_render_progress)
progress_bar.writeProgress(progress, watch.elapsed());
if (need_render_progress)
progress_indication.writeProgress();
}
void onLogData(Block & block)
{
initLogsOutputStream();
progress_bar.clearProgress();
progress_indication.clearProgressOutput();
logs_out_stream->write(block);
logs_out_stream->flush();
}
@ -2282,28 +2302,23 @@ private:
void onProgress(const Progress & value)
{
if (!progress_bar.updateProgress(progress, value))
if (!progress_indication.updateProgress(value))
{
// Just a keep-alive update.
return;
}
if (block_out_stream)
block_out_stream->onProgress(value);
progress_bar.writeProgress(progress, watch.elapsed());
if (need_render_progress)
progress_indication.writeProgress();
}
void writeFinalProgress()
{
std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes);
size_t elapsed_ns = watch.elapsed();
if (elapsed_ns)
std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
progress_indication.writeFinalProgress();
}
@ -2324,7 +2339,7 @@ private:
void onEndOfStream()
{
progress_bar.clearProgress();
progress_indication.clearProgressOutput();
if (block_out_stream)
block_out_stream->writeSuffix();
@ -2334,9 +2349,9 @@ private:
resetOutput();
if (is_interactive && !progress_bar.written_first_block)
if (is_interactive && !written_first_block)
{
progress_bar.clearProgress();
progress_indication.clearProgressOutput();
std::cout << "Ok." << std::endl;
}
}
@ -2431,6 +2446,8 @@ public:
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
@ -2468,7 +2485,7 @@ public:
("password", po::value<std::string>()->implicit_value("\n", ""), "password")
("ask-password", "ask-password")
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation")
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
("query_id", po::value<std::string>(), "query_id")
("query,q", po::value<std::string>(), "query")
("database,d", po::value<std::string>(), "database")

View File

@ -286,7 +286,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
+ "." + escapeForFileName(table_push.first)
+ "." + escapeForFileName(table_push.second);
engine_push_str = config.getString(table_prefix + "engine");
engine_push_str = config.getString(table_prefix + "engine", "rand()");
{
ParserStorage parser_storage;

View File

@ -389,32 +389,29 @@ void LocalServer::processQueries()
CurrentThread::QueryScope query_scope_holder(context);
///Set progress show
progress_bar.need_render_progress = config().getBool("progress", false);
need_render_progress = config().getBool("progress", false);
if (progress_bar.need_render_progress)
if (need_render_progress)
{
context->setProgressCallback([&](const Progress & value)
{
if (!progress_bar.updateProgress(progress, value))
{
// Just a keep-alive update.
return;
}
progress_bar.writeProgress(progress, watch.elapsed());
});
{
/// Write progress only if progress was updated
if (progress_indication.updateProgress(value))
progress_indication.writeProgress();
});
}
bool echo_queries = config().hasOption("echo") || config().hasOption("verbose");
if (need_render_progress)
progress_indication.setFileProgressCallback(context);
std::exception_ptr exception;
for (const auto & query : queries)
{
watch.restart();
progress.reset();
progress_bar.show_progress_bar = false;
progress_bar.written_progress_chars = 0;
progress_bar.written_first_block = false;
written_first_block = false;
progress_indication.resetProgress();
ReadBufferFromString read_buf(query);
WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO);

View File

@ -7,7 +7,7 @@
#include <Interpreters/Context.h>
#include <loggers/Loggers.h>
#include <Poco/Util/Application.h>
#include <Common/ProgressBar.h>
#include <Common/ProgressIndication.h>
namespace DB
{
@ -49,9 +49,12 @@ protected:
/// Settings specified via command line args
Settings cmd_settings;
ProgressBar progress_bar;
Progress progress;
Stopwatch watch;
bool need_render_progress = false;
bool written_first_block = false;
ProgressIndication progress_indication;
std::optional<std::filesystem::path> temporary_directory_to_delete;
};

Some files were not shown because too many files have changed in this diff Show More