Merge branch 'master' into zookeeper_snapshots

This commit is contained in:
mergify[bot] 2021-06-28 14:17:32 +00:00 committed by GitHub
commit 04814114e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
223 changed files with 3730 additions and 1460 deletions

View File

@ -184,10 +184,27 @@ endif ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (NOT OBJCOPY_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX)
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
if (NOT OBJCOPY_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX)
find_program (OBJCOPY_PATH NAMES "objcopy" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
endif ()
endif ()
endif ()
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
message (STATUS "Using objcopy: ${OBJCOPY_PATH}")
else ()
message(FATAL_ERROR "Cannot find objcopy.")
message (FATAL_ERROR "Cannot find objcopy.")
endif ()
if (OS_DARWIN)

View File

@ -17,7 +17,7 @@ class DateLUT : private boost::noncopyable
{
public:
/// Return singleton DateLUTImpl instance for the default time zone.
static ALWAYS_INLINE const DateLUTImpl & instance()
static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071
{
const auto & date_lut = getInstance();
return *date_lut.default_impl.load(std::memory_order_acquire);

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54452)
SET(VERSION_REVISION 54453)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 976ccc2e908ac3bc28f763bfea8134ea0a121b40)
SET(VERSION_DESCRIBE v21.7.1.1-prestable)
SET(VERSION_STRING 21.7.1.1)
SET(VERSION_GITHASH fb895056568e26200629c7d19626e92d2dedc70d)
SET(VERSION_DESCRIBE v21.8.1.1-prestable)
SET(VERSION_STRING 21.8.1.1)
# end of autochange

View File

@ -33,44 +33,25 @@ macro(clickhouse_embed_binaries)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
# If cross-compiling, ensure we use the toolchain file and target the
# actual target architecture
if (CMAKE_CROSSCOMPILING)
set(CROSS_COMPILE_FLAGS "--target=${CMAKE_C_COMPILER_TARGET} --gcc-toolchain=${TOOLCHAIN_FILE}")
else()
set(CROSS_COMPILE_FLAGS "")
endif()
add_library("${EMBED_TARGET}" STATIC)
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
set(RESOURCE_OBJS)
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(RESOURCE_OBJ "${RESOURCE_FILE}.o")
list(APPEND RESOURCE_OBJS "${RESOURCE_OBJ}")
# Normalize the name of the resource
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
# Normalize the name of the resource.
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
# Put the configured assembly file in the output directory.
# This is so we can clean it up as usual, and we CD to the
# source directory before compiling, so that the assembly
# `.incbin` directive can find the file.
# Generate the configured assembly file in the output directory.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Generate the output object file by compiling the assembly, in the directory of
# the sources so that the resource file may also be found
add_custom_command(
OUTPUT ${RESOURCE_OBJ}
COMMAND cd "${EMBED_RESOURCE_DIR}" &&
${CMAKE_C_COMPILER} "${CROSS_COMPILE_FLAGS}" -c -o
"${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}"
"${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}"
)
set_source_files_properties("${RESOURCE_OBJ}" PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach()
# Set the include directory for relative paths specified for `.incbin` directive.
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
add_library("${EMBED_TARGET}" STATIC ${RESOURCE_OBJS})
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
endforeach()
endmacro()

View File

@ -4,7 +4,6 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
get_filename_component (TOOLCHAIN_FILE "${CMAKE_TOOLCHAIN_FILE}" REALPATH)
# We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE)

View File

@ -26,7 +26,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
set_property (TARGET cctz PROPERTY IMPORTED_LOCATION ${LIBRARY_CCTZ})
set_property (TARGET cctz PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_CCTZ})
endif()
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")

View File

@ -1,7 +1,7 @@
add_library(murmurhash
src/murmurhash2.cpp
src/murmurhash3.cpp
include/murmurhash2.h
include/murmurhash3.h)
src/MurmurHash2.cpp
src/MurmurHash3.cpp
include/MurmurHash2.h
include/MurmurHash3.h)
target_include_directories (murmurhash PUBLIC include)

View File

@ -0,0 +1,49 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#ifndef MURMURHASH2_H
#define MURMURHASH2_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed );
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed );
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed );
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH2_H_

View File

@ -2,7 +2,10 @@
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
#ifndef MURMURHASH3_H
#define MURMURHASH3_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -23,20 +26,22 @@ typedef unsigned __int64 uint64_t;
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH3_H_

View File

@ -1,31 +0,0 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2 (const void * key, int len, uint32_t seed);
uint64_t MurmurHash64A (const void * key, int len, uint64_t seed);
uint64_t MurmurHash64B (const void * key, int len, uint64_t seed);
uint32_t MurmurHash2A (const void * key, int len, uint32_t seed);
uint32_t MurmurHashNeutral2 (const void * key, int len, uint32_t seed);
uint32_t MurmurHashAligned2 (const void * key, int len, uint32_t seed);

View File

@ -0,0 +1,523 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "MurmurHash2.h"
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = (const uint64_t *)key;
const uint64_t * end = data + (len/8);
while(data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = (const unsigned char*)data;
switch(len & 7)
{
case 7: h ^= uint64_t(data2[6]) << 48;
case 6: h ^= uint64_t(data2[5]) << 40;
case 5: h ^= uint64_t(data2[4]) << 32;
case 4: h ^= uint64_t(data2[3]) << 24;
case 3: h ^= uint64_t(data2[2]) << 16;
case 2: h ^= uint64_t(data2[1]) << 8;
case 1: h ^= uint64_t(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = uint32_t(seed) ^ len;
uint32_t h2 = uint32_t(seed >> 32);
const uint32_t * data = (const uint32_t *)key;
while(len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if(len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch(len)
{
case 3: h2 ^= ((unsigned char*)data)[2] << 16;
case 2: h2 ^= ((unsigned char*)data)[1] << 8;
case 1: h2 ^= ((unsigned char*)data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch(len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// CMurmurHash2A, by Austin Appleby
// This is a sample implementation of MurmurHash2A designed to work
// incrementally.
// Usage -
// CMurmurHash2A hasher
// hasher.Begin(seed);
// hasher.Add(data1,size1);
// hasher.Add(data2,size2);
// ...
// hasher.Add(dataN,sizeN);
// uint32_t hash = hasher.End()
class CMurmurHash2A
{
public:
void Begin ( uint32_t seed = 0 )
{
m_hash = seed;
m_tail = 0;
m_count = 0;
m_size = 0;
}
void Add ( const unsigned char * data, size_t len )
{
m_size += len;
MixTail(data,len);
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(m_hash,k);
data += 4;
len -= 4;
}
MixTail(data,len);
}
uint32_t End ( void )
{
mmix(m_hash,m_tail);
mmix(m_hash,m_size);
m_hash ^= m_hash >> 13;
m_hash *= m;
m_hash ^= m_hash >> 15;
return m_hash;
}
private:
static const uint32_t m = 0x5bd1e995;
static const int r = 24;
void MixTail ( const unsigned char * & data, size_t & len )
{
while( len && ((len<4) || m_count) )
{
m_tail |= (*data++) << (m_count * 8);
m_count++;
len--;
if(m_count == 4)
{
mmix(m_hash,m_tail);
m_tail = 0;
m_count = 0;
}
}
}
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
};
//-----------------------------------------------------------------------------
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed ^ len;
size_t align = (uint64_t)data & 3;
if(align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch(align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while(len >= 4)
{
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if(len >= align)
{
switch(align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch(len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while(len >= 4)
{
uint32_t k = *(uint32_t *)data;
MIX(h,k,m);
data += 4;
len -= 4;
}
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}
//-----------------------------------------------------------------------------

View File

@ -1,3 +1,4 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
@ -6,8 +7,8 @@
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "murmurhash3.h"
#include <cstring>
#include "MurmurHash3.h"
#include <string.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -93,7 +94,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len,
void MurmurHash3_x86_32 ( const void * key, size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -149,7 +150,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x86_128 ( const void * key, const int len,
void MurmurHash3_x86_128 ( const void * key, const size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -254,7 +255,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
void MurmurHash3_x64_128 ( const void * key, const size_t len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -332,3 +333,6 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
//-----------------------------------------------------------------------------

View File

@ -1,423 +0,0 @@
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "murmurhash2.h"
#include <cstring>
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2(const void * key, int len, uint32_t seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = reinterpret_cast<const uint64_t *>(key);
const uint64_t * end = data + (len/8);
while (data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = reinterpret_cast<const unsigned char *>(data);
switch (len & 7)
{
case 7: h ^= static_cast<uint64_t>(data2[6]) << 48;
case 6: h ^= static_cast<uint64_t>(data2[5]) << 40;
case 5: h ^= static_cast<uint64_t>(data2[4]) << 32;
case 4: h ^= static_cast<uint64_t>(data2[3]) << 24;
case 3: h ^= static_cast<uint64_t>(data2[2]) << 16;
case 2: h ^= static_cast<uint64_t>(data2[1]) << 8;
case 1: h ^= static_cast<uint64_t>(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B(const void * key, int len, uint64_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = static_cast<uint32_t>(seed) ^ len;
uint32_t h2 = static_cast<uint32_t>(seed >> 32);
const uint32_t * data = reinterpret_cast<const uint32_t *>(key);
while (len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if (len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch (len)
{
case 3: h2 ^= reinterpret_cast<const unsigned char *>(data)[2] << 16;
case 2: h2 ^= reinterpret_cast<const unsigned char *>(data)[1] << 8;
case 1: h2 ^= reinterpret_cast<const unsigned char *>(data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed;
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch (len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed ^ len;
int align = reinterpret_cast<uint64_t>(data) & 3;
if (align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch (align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while (len >= 4)
{
d = *(reinterpret_cast<const uint32_t *>(data));
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if (len >= align)
{
switch (align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch (len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
MIX(h,k,m);
data += 4;
len -= 4;
}
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.7.1.1) unstable; urgency=low
clickhouse (21.8.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 20 May 2021 22:23:29 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 28 Jun 2021 00:50:15 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -72,7 +72,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin
# Build and install tools for cross-linking to Darwin (x86-64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
@ -81,8 +81,17 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
&& make install \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain and SDK for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
# Download toolchain for ARM
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.

View File

@ -3,7 +3,9 @@
set -x -e
mkdir -p build/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
mkdir -p build/cmake/toolchain/linux-aarch64
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1

View File

@ -58,6 +58,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
CLANG_PREFIX = "clang"
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
FREEBSD_SUFFIX = "-freebsd"
@ -66,9 +67,10 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
is_clang = compiler.startswith(CLANG_PREFIX)
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
is_cross_arm = compiler.endswith(ARM_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_cross_compile = is_cross_darwin or is_cross_arm or is_cross_freebsd
is_cross_compile = is_cross_darwin or is_cross_darwin_arm or is_cross_arm or is_cross_freebsd
# Explicitly use LLD with Clang by default.
# Don't force linker for cross-compilation.
@ -82,6 +84,13 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
elif is_cross_darwin_arm:
cc = compiler[:-len(DARWIN_ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
elif is_cross_arm:
cc = compiler[:-len(ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
@ -185,8 +194,8 @@ if __name__ == "__main__":
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
parser.add_argument("--output-dir", required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd",
"gcc-10"), default="clang-11")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
"clang-11-freebsd", "gcc-10"), default="clang-11")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument("--unbundled", action="store_true")
parser.add_argument("--split-binary", action="store_true")

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -113,6 +113,7 @@ function start_server
echo "ClickHouse server pid '$server_pid' started and responded"
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -103,6 +103,7 @@ function fuzz
kill -0 $server_pid
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -561,7 +561,7 @@ if args.report == 'main':
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
if very_unstable_queries > 3:
if very_unstable_queries > 5:
error_tests += very_unstable_queries
status = 'failure'
message_array.append(str(very_unstable_queries) + ' unstable')

View File

@ -55,6 +55,7 @@ function start()
done
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -2,18 +2,16 @@
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
Minimal ClickHouse build example:
```cmake
```bash
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_C_COMPILER=$(which clang-11) \
-DCMAKE_CXX_COMPILER=$(which clang++-11) \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \

View File

@ -33,7 +33,7 @@ Reboot.
``` bash
brew update
brew install cmake ninja libtool gettext llvm gcc
brew install cmake ninja libtool gettext llvm gcc binutils
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -0,0 +1,53 @@
---
toc_priority: 12
toc_title: ExternalDistributed
---
# ExternalDistributed {#externaldistributed}
The `ExternalDistributed` engine allows to perform `SELECT` queries on data that is stored on a remote servers MySQL or PostgreSQL. Accepts [MySQL](../../../engines/table-engines/integrations/mysql.md) or [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) engines as an argument so sharding is possible.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password');
```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
The table structure can differ from the original table structure:
- Column names should be the same as in the original table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
**Engine Parameters**
- `engine` — The table engine `MySQL` or `PostgreSQL`.
- `host:port` — MySQL or PostgreSQL server address.
- `database` — Remote database name.
- `table` — Remote table name.
- `user` — User name.
- `password` — User password.
## Implementation Details {#implementation-details}
Supports multiple replicas that must be listed by `|` and shards must be listed by `,`. For example:
```sql
CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) ENGINE = ExternalDistributed('MySQL', `mysql{1|2}:3306,mysql{3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
When specifying replicas, one of the available replicas is selected for each of the shards when reading. If the connection fails, the next replica is selected, and so on for all the replicas. If the connection attempt fails for all the replicas, the attempt is repeated the same way several times.
You can specify any number of shards and any number of replicas for each shard.
**See Also**
- [MySQL table engine](../../../engines/table-engines/integrations/mysql.md)
- [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md)
- [Distributed table engine](../../../engines/table-engines/special/distributed.md)

View File

@ -28,8 +28,8 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen
The table structure can differ from the original MySQL table structure:
- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is true, if false - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types.
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types.
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
**Engine Parameters**
@ -55,6 +55,12 @@ Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are executed on the MySQL s
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
Supports multiple replicas that must be listed by `|`. For example:
```sql
CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL(`mysql{2|3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
## Usage Example {#usage-example}
Table in MySQL:

View File

@ -29,7 +29,7 @@ The table structure can differ from the source table structure:
- Column names should be the same as in the source table, but you can use just some of these columns and in any order.
- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is true, if false - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types.
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
**Engine Parameters**

View File

@ -23,8 +23,8 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen
The table structure can differ from the original PostgreSQL table structure:
- Column names should be the same as in the original PostgreSQL table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is 1, if 0 - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types.
- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../engines/database-engines/postgresql.md#data_types-support) values to the ClickHouse data types.
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
**Engine Parameters**
@ -49,6 +49,12 @@ PostgreSQL `Array` types are converted into ClickHouse arrays.
!!! info "Note"
Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column.
Supports multiple replicas that must be listed by `|`. For example:
```sql
CREATE TABLE test_replicas (id UInt32, name String) ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword');
```
Replicas priority for PostgreSQL dictionary source is supported. The bigger the number in map, the less the priority. The highest priority is `0`.

View File

@ -65,7 +65,7 @@ By checking the row count:
Query:
``` sq;
``` sql
SELECT count() FROM recipes;
```

View File

@ -1302,6 +1302,7 @@ The table below shows supported data types and how they match ClickHouse [data t
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.

View File

@ -379,7 +379,7 @@ Default value: `1`.
## insert_null_as_default {#insert_null_as_default}
Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type.
Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type.
If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
@ -1182,7 +1182,7 @@ Possible values:
Default value: `1`.
**Additional Info**
**Additional Info**
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
@ -1194,21 +1194,22 @@ This setting is useful for replicated tables with a sampling key. A query may be
!!! warning "Warning"
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
## compile {#compile}
## compile_expressions {#compile-expressions}
Enable compilation of queries. By default, 0 (disabled).
Enables or disables compilation of frequently used simple functions and operators to native code with LLVM at runtime.
The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY).
If this portion of the pipeline was compiled, the query may run faster due to the deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution.
Possible values:
## min_count_to_compile {#min-count-to-compile}
- 0 — Disabled.
- 1 — Enabled.
How many times to potentially use a compiled chunk of code before running compilation. By default, 3.
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including queries that are currently running.
Default value: `1`.
Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause.
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they do not use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## min_count_to_compile_expression {#min-count-to-compile-expression}
Minimum count of executing same expression before it is get compiled.
Default value: `3`.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
@ -1558,7 +1559,7 @@ Possible values:
- 0 — Disabled (final query processing is done on the initiator node).
- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possilbe when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
**Example**
@ -1622,7 +1623,7 @@ Possible values:
Default value: 0
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shardslrewrite-in}
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shards-rewrite-in}
Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards).
@ -1802,6 +1803,27 @@ Possible values:
Default value: 0.
## distributed_directory_monitor_split_batch_on_failure {#distributed_directory_monitor_split_batch_on_failure}
Enables/disables splitting batches on failures.
Sometimes sending particular batch to the remote shard may fail, because of some complex pipeline after (i.e. `MATERIALIZED VIEW` with `GROUP BY`) due to `Memory limit exceeded` or similar errors. In this case, retrying will not help (and this will stuck distributed sends for the table) but sending files from that batch one by one may succeed INSERT.
So installing this setting to `1` will disable batching for such batches (i.e. temporary disables `distributed_directory_monitor_batch_inserts` for failed batches).
Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: 0.
!!! note "Note"
This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine).
!!! warning "Warning"
You should not rely on automatic batch splitting, since this may hurt performance.
## os_thread_priority {#setting-os-thread-priority}
Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core.
@ -2085,7 +2107,7 @@ Default value: 128.
## background_fetches_pool_size {#background_fetches_pool_size}
Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and cant be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recomended to use default value.
Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and cant be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recommended to use default value.
Possible values:
@ -2672,7 +2694,7 @@ Default value: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
Possible values:
@ -2856,7 +2878,7 @@ Default value: `0`.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
Possible values:
@ -2962,7 +2984,7 @@ Enables or disables using the original column names instead of aliases in query
Possible values:
- 0 — The column name is substituted with the alias.
- 1 — The column name is not substituted with the alias.
- 1 — The column name is not substituted with the alias.
Default value: `0`.
@ -3075,7 +3097,7 @@ SELECT
sum(a),
sumCount(b).1,
sumCount(b).2,
(sumCount(b).1) / (sumCount(b).2)
(sumCount(b).1) / (sumCount(b).2)
FROM fuse_tbl
```
@ -3144,4 +3166,17 @@ SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
## external_table_functions_use_nulls {#external-table-functions-use-nulls}
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md)] table functions use Nullable columns.
Possible values:
- 0 — The table function explicitly uses Nullable columns.
- 1 — The table function implicitly uses Nullable columns.
Default value: `1`.
**Usage**
If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.

View File

@ -0,0 +1,39 @@
# system.data_skipping_indices {#system-data-skipping-indices}
Contains information about existing data skipping indices in all the tables.
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `name` ([String](../../sql-reference/data-types/string.md)) — Index name.
- `type` ([String](../../sql-reference/data-types/string.md)) — Index type.
- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression used to calculate the index.
- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of granules in the block.
**Example**
```sql
SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
database: default
table: user_actions
name: clicks_idx
type: minmax
expr: clicks
granularity: 1
Row 2:
──────
database: default
table: users
name: contacts_null_idx
type: minmax
expr: assumeNotNull(contacts_null)
granularity: 1
```

View File

@ -1,37 +0,0 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
**Syntax**
``` sql
initializeAggregation (aggregate_function, column_1, column_2)
```
**Arguments**
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
**Returned value(s)**
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
**Example**
Query:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Result:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -486,6 +486,7 @@ Example of settings:
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -503,6 +504,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```
@ -527,6 +529,8 @@ Setting fields:
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
- `fail_on_connection_loss` The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`.
MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`.
Example of settings:
@ -542,6 +546,7 @@ Example of settings:
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -558,6 +563,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```

View File

@ -831,7 +831,7 @@ Returns 0 for the first row and the difference from the previous row for each su
!!! warning "Warning"
It can reach the previous row only inside the currently processed data block.
The result of the function depends on the affected data blocks and the order of data in the block.
The rows order used during the calculation of `runningDifference` can differ from the order of rows returned to the user.
@ -908,7 +908,7 @@ Same as for [runningDifference](./other-functions.md#other_functions-runningdiff
## runningConcurrency {#runningconcurrency}
Calculates the number of concurrent events.
Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type.
Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type.
The function calculates the total number of active (concurrent) events for each event start time.
@ -1424,11 +1424,83 @@ Result:
└───────────┴────────┘
```
## initializeAggregation {#initializeaggregation}
Calculates result of aggregate function based on single value. It is intended to use this function to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values.
**Syntax**
``` sql
initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
```
**Arguments**
- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md).
- `arg` — Arguments of aggregate function.
**Returned value(s)**
- Result of aggregation for every row passed to the function.
The return type is the same as the return type of function, that `initializeAgregation` takes as first argument.
**Example**
Query:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000));
```
Result:
```text
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘
```
Query:
```sql
SELECT finalizeAggregation(state), toTypeName(state) FROM (SELECT initializeAggregation('sumState', number % 3) AS state FROM numbers(5));
```
Result:
```text
┌─finalizeAggregation(state)─┬─toTypeName(state)─────────────┐
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
│ 2 │ AggregateFunction(sum, UInt8) │
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
└────────────────────────────┴───────────────────────────────┘
```
Example with `AggregatingMergeTree` table engine and `AggregateFunction` column:
```sql
CREATE TABLE metrics
(
key UInt64,
value AggregateFunction(sum, UInt64) DEFAULT initializeAggregation('sumState', toUInt64(0))
)
ENGINE = AggregatingMergeTree
ORDER BY key
```
```sql
INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42)))
```
**See Also**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
## finalizeAggregation {#function-finalizeaggregation}
Takes state of aggregate function. Returns result of aggregation (or finalized state when using[-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) combinator).
**Syntax**
**Syntax**
``` sql
finalizeAggregation(state)
@ -1442,7 +1514,7 @@ finalizeAggregation(state)
- Value/values that was aggregated.
Type: Value of any types that was aggregated.
Type: Value of any types that was aggregated.
**Examples**
@ -1474,7 +1546,7 @@ Result:
└──────────────────────────────────┘
```
Note that `NULL` values are ignored.
Note that `NULL` values are ignored.
Query:
@ -1520,10 +1592,9 @@ Result:
└────────┴─────────────┴────────────────┘
```
**See Also**
**See Also**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
- [initializeAggregation](../../sql-reference/aggregate-functions/reference/initializeAggregation.md)
- [initializeAggregation](#initializeaggregation)
## runningAccumulate {#runningaccumulate}

View File

@ -119,7 +119,7 @@ For manage uncompressed data cache parameters use following server level setting
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
Reset the compiled expression cache. Used in development of ClickHouse and performance tests.
Complied expression cache used when query/user/profile enable option [compile](../../operations/settings/settings.md#compile)
Compiled expression cache used when query/user/profile enable option [compile-expressions](../../operations/settings/settings.md#compile-expressions)
## FLUSH LOGS {#query_language-system-flush_logs}

View File

@ -39,6 +39,18 @@ Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on t
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
Supports multiple replicas that must be listed by `|`. For example:
```sql
SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
or
```sql
SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
**Returned Value**
A table object with the same columns as the original MySQL table.

View File

@ -43,8 +43,20 @@ PostgreSQL Array types converts into ClickHouse arrays.
!!! info "Note"
Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows.
Supports multiple replicas that must be listed by `|`. For example:
Supports replicas priority for PostgreSQL dictionary source. The bigger the number in map, the less the priority. The highest priority is `0`.
```sql
SELECT name FROM postgresql(`postgres{1|2|3}:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
or
```sql
SELECT name FROM postgresql(`postgres1:5431|postgres2:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
Supports replicas priority for PostgreSQL dictionary source. The bigger the number in map, the less the priority. The highest priority is `0`.
**Examples**

View File

@ -817,22 +817,6 @@ load_balancing = first_or_random
のための一貫性を異なる部分に同じデータを分割)、このオプションにしているときだけサンプリングキーを設定します。
レプリカラグは制御されません。
## コンパイル {#compile}
を編集ます。 既定では、0(無効)です。
コンパイルは、クエリ処理パイプラインの一部にのみ使用されます。
この部分のパイプラインのためのクエリを実行するアによる展開の短サイクルinlining集計機能。 複数の単純な集計関数を使用するクエリでは、最大のパフォーマンスの向上が見られます。 通常、性能は軽微であります。 非常に珍しい例で遅くなクエリを実行します。
## min_count_to_compile {#min-count-to-compile}
り方を潜在的に利用コチャンクのコードの実行前に作成する。 デフォルトでは3.
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
値が1以上の場合、コンパイルは別のスレッドで非同期に実行されます。 結果は、現在実行中のクエリを含め、準備が整うとすぐに使用されます。
コンパイルされたコードは、クエリで使用される集計関数とGROUP BY句内のキーの種類のそれぞれの異なる組み合わせに必要です。
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
値がtrueの場合、json\*Int64およびUInt64形式ほとんどのJavaScript実装との互換性のためを使用するときに整数が引用符で表示されます。

View File

@ -0,0 +1,53 @@
---
toc_priority: 12
toc_title: ExternalDistributed
---
# ExternalDistributed {#externaldistributed}
Движок `ExternalDistributed` позволяет выполнять запросы `SELECT` для таблиц на удаленном сервере MySQL или PostgreSQL. Принимает в качестве аргумента табличные движки [MySQL](../../../engines/table-engines/integrations/mysql.md) или [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md), поэтому возможно шардирование.
## Создание таблицы {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password');
```
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
Структура таблицы может отличаться от структуры исходной таблицы:
- Имена столбцов должны быть такими же, как в исходной таблице, но можно использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице. ClickHouse пытается [привести](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) значения к типам данных ClickHouse.
**Параметры движка**
- `engine` — табличный движок `MySQL` или `PostgreSQL`.
- `host:port` — адрес сервера MySQL или PostgreSQL.
- `database` — имя базы данных на сервере.
- `table` — имя таблицы.
- `user` — имя пользователя.
- `password` — пароль пользователя.
## Особенности реализации {#implementation-details}
Поддерживает несколько реплик, которые должны быть перечислены через `|`, а шарды — через `,`. Например:
```sql
CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) ENGINE = ExternalDistributed('MySQL', `mysql{1|2}:3306,mysql{3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
При указании реплик для каждого из шардов при чтении выбирается одна из доступных реплик. Если соединиться не удалось, то выбирается следующая реплика, и так для всех реплик. Если попытка соединения не удалась для всех реплик, то сервер ClickHouse снова пытается соединиться с одной из реплик, перебирая их по кругу, и так несколько раз.
Вы можете указать любое количество шардов и любое количество реплик для каждого шарда.
**Смотрите также**
- [Табличный движок MySQL](../../../engines/table-engines/integrations/mysql.md)
- [Табличный движок PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [Табличный движок Distributed](../../../engines/table-engines/special/distributed.md)

View File

@ -20,11 +20,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
Структура таблицы может отличаться от исходной структуры таблицы MySQL:
Структура таблицы может отличаться от структуры исходной таблицы MySQL:
- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) значения к типам данных ClickHouse.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.
- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но можно использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [привести](../../../engines/database-engines/mysql.md#data_types-support) значения к типам данных ClickHouse.
- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
**Параметры движка**
@ -50,6 +50,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Остальные условия и ограничение выборки `LIMIT` будут выполнены в ClickHouse только после выполнения запроса к MySQL.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL(`mysql{2|3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse');
```
## Пример использования {#primer-ispolzovaniia}
Таблица в MySQL:

View File

@ -29,7 +29,7 @@ ENGINE = ODBC(connection_settings, external_database, external_table)
- Имена столбцов должны быть такими же, как в исходной таблице, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов аналогичных столбцов в исходной таблице. ClickHouse пытается [приводить](../../../engines/table-engines/integrations/odbc.md#type_conversion_function-cast) значения к типам данных ClickHouse.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.
- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
**Параметры движка**

View File

@ -20,19 +20,19 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
Структура таблицы может отличаться от исходной структуры таблицы PostgreSQL:
Структура таблицы может отличаться от структуры исходной таблицы PostgreSQL:
- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов.
- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но можно использовать только некоторые из этих столбцов и в любом порядке.
- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [привести](../../../engines/database-engines/postgresql.md#data_types-support) значения к типам данных ClickHouse.
- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
**Параметры движка**
- `host:port` — адрес сервера PostgreSQL.
- `database`Имя базы данных на сервере PostgreSQL.
- `table`Имя таблицы.
- `user`Имя пользователя PostgreSQL.
- `password`Пароль пользователя PostgreSQL.
- `database`имя базы данных на сервере PostgreSQL.
- `table`имя таблицы.
- `user`имя пользователя PostgreSQL.
- `password`пароль пользователя PostgreSQL.
- `schema` — имя схемы, если не используется схема по умолчанию. Необязательный аргумент.
## Особенности реализации {#implementation-details}
@ -49,6 +49,12 @@ PostgreSQL массивы конвертируются в массивы ClickHo
!!! info "Внимание"
Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
CREATE TABLE test_replicas (id UInt32, name String) ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword');
```
При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`.

View File

@ -348,7 +348,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
## input_format_null_as_default {#settings-input-format-null-as-default}
Включает или отключает инициализацию [значениями по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) ячеек с [NULL](../../sql-reference/syntax.md#null-literal), если тип данных столбца не позволяет [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Эта настройка используется для запросов [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) для текстовых входных форматов.
@ -361,7 +361,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
## insert_null_as_default {#insert_null_as_default}
Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`.
@ -1181,22 +1181,22 @@ load_balancing = round_robin
!!! warning "Предупреждение"
Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries).
## compile_expressions {#compile-expressions}
## compile {#compile}
Включает или выключает компиляцию часто используемых функций и операторов. Компиляция производится в нативный код платформы с помощью LLVM во время выполнения.
Включить компиляцию запросов. По умолчанию - 0 (выключено).
Возможные значения:
Компиляция предусмотрена только для части конвейера обработки запроса - для первой стадии агрегации (GROUP BY).
В случае, если эта часть конвейера была скомпилирована, запрос может работать быстрее, за счёт разворачивания коротких циклов и инлайнинга вызовов агрегатных функций. Максимальный прирост производительности (до четырёх раз в редких случаях) достигается на запросах с несколькими простыми агрегатными функциями. Как правило, прирост производительности незначителен. В очень редких случаях возможно замедление выполнения запроса.
- 0 — компиляция выключена.
- 1 — компиляция включена.
## min_count_to_compile {#min-count-to-compile}
Значение по умолчанию: `1`.
После скольких раз, когда скомпилированный кусок кода мог пригодиться, выполнить его компиляцию. По умолчанию - 3.
Для тестирования можно установить значение 0: компиляция выполняется синхронно, и запрос ожидает окончания процесса компиляции перед продолжением выполнения. Во всех остальных случаях используйте значения, начинающиеся с 1. Как правило, компиляция занимает по времени около 5-10 секунд.
В случае, если значение равно 1 или больше, компиляция выполняется асинхронно, в отдельном потоке. При готовности результата, он сразу же будет использован, в том числе, уже выполняющимися в данный момент запросами.
## min_count_to_compile_expression {#min-count-to-compile-expression}
Скомпилированный код требуется для каждого разного сочетания используемых в запросе агрегатных функций и вида ключей в GROUP BY.
Результаты компиляции сохраняются в директории build в виде .so файлов. Количество результатов компиляции не ограничено, так как они не занимают много места. При перезапуске сервера, старые результаты будут использованы, за исключением случая обновления сервера - тогда старые результаты удаляются.
Минимальное количество выполнений одного и того же выражения до его компиляции.
Значение по умолчанию: `3`.
## input_format_skip_unknown_fields {#input-format-skip-unknown-fields}
@ -2721,7 +2721,7 @@ SELECT * FROM test2;
- 0 — запрос `INSERT` добавляет данные в конец файла после существующих.
- 1 — `INSERT` удаляет имеющиеся в файле данные и замещает их новыми.
Значение по умолчанию: `0`.
Значение по умолчанию: `0`.
## allow_experimental_geo_types {#allow-experimental-geo-types}
@ -2735,7 +2735,7 @@ SELECT * FROM test2;
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`.
Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`.
Возможные значения:
@ -2813,7 +2813,7 @@ SELECT * FROM test2;
**Пример**
Какие изменения привносит включение и выключение настройки:
Какие изменения привносит включение и выключение настройки:
Запрос:
@ -3023,4 +3023,17 @@ SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
## external_table_functions_use_nulls {#external-table-functions-use-nulls}
Определяет, как табличные функции [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) и [odbc](../../sql-reference/table-functions/odbc.md)] используют Nullable столбцы.
Возможные значения:
- 0 — табличная функция явно использует Nullable столбцы.
- 1 — табличная функция неявно использует Nullable столбцы.
Значение по умолчанию: `1`.
**Использование**
Если установлено значение `0`, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.

View File

@ -1,40 +0,0 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
**Синтаксис**
``` sql
initializeAggregation (aggregate_function, column_1, column_2)
```
**Аргументы**
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
Пример:
Возвращаемый тип функций с суффиксом `State``AggregateFunction`.
**Пример**
Запрос:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Результат:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -486,6 +486,7 @@ LIFETIME(MIN 300 MAX 360)
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -503,6 +504,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```
@ -527,6 +529,8 @@ SOURCE(MYSQL(
- `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external-dicts-dict-lifetime.md).
- `fail_on_connection_loss` параметр конфигурации, контролирующий поведение сервера при потере соединения. Если значение `true`, то исключение генерируется сразу же, если соединение между клиентом и сервером было потеряно. Если значение `false`, то сервер повторно попытается выполнить запрос три раза прежде чем сгенерировать исключение. Имейте в виду, что повторные попытки могут увеличить время выполнения запроса. Значение по умолчанию: `false`.
MySQL можно подключить на локальном хосте через сокеты, для этого необходимо задать `host` и `socket`.
Пример настройки:
@ -542,6 +546,7 @@ MySQL можно подключить на локальном хосте чер
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -558,6 +563,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```

View File

@ -13,7 +13,7 @@ toc_title: "Прочие функции"
Возвращает именованное значение из секции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурации сервера.
**Синтаксис**
**Синтаксис**
```sql
getMacro(name)
@ -854,8 +854,8 @@ WHERE diff != 1
## runningConcurrency {#runningconcurrency}
Подсчитывает количество одновременно идущих событий.
У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных.
Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке.
У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных.
Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке.
!!! warning "Предупреждение"
События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение.
@ -1371,11 +1371,84 @@ SELECT formatReadableSize(filesystemCapacity()) AS "Capacity", toTypeName(filesy
└───────────┴────────┘
```
## initializeAggregation {#initializeaggregation}
Вычисляет результат агрегатной функции для каждой строки. Предназначена для инициализации агрегатных функций с комбинатором [-State](../../sql-reference/aggregate-functions/combinators.md#state). Может быть полезна для создания состояний агрегатных функций для последующей их вставки в столбцы типа [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) или использования в качестве значений по-умолчанию.
**Синтаксис**
``` sql
initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
```
**Аргументы**
- `aggregate_function` — название агрегатной функции, состояние которой нужно создать. [String](../../sql-reference/data-types/string.md#string).
- `arg` — аргументы, которые передаются в агрегатную функцию.
**Возвращаемое значение**
- В каждой строке результат агрегатной функции, примененной к аргументам из этой строки.
Тип возвращаемого значения такой же, как и у функции, переданной первым аргументом.
**Пример**
Запрос:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000));
```
Результат:
```text
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘
```
Запрос:
```sql
SELECT finalizeAggregation(state), toTypeName(state) FROM (SELECT initializeAggregation('sumState', number % 3) AS state FROM numbers(5));
```
Результат:
```text
┌─finalizeAggregation(state)─┬─toTypeName(state)─────────────┐
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
│ 2 │ AggregateFunction(sum, UInt8) │
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
└────────────────────────────┴───────────────────────────────┘
```
Пример с движком таблиц `AggregatingMergeTree` и столбцом типа `AggregateFunction`:
```sql
CREATE TABLE metrics
(
key UInt64,
value AggregateFunction(sum, UInt64) DEFAULT initializeAggregation('sumState', toUInt64(0))
)
ENGINE = AggregatingMergeTree
ORDER BY key
```
```sql
INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42)))
```
**Смотрите также**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
## finalizeAggregation {#function-finalizeaggregation}
Принимает состояние агрегатной функции. Возвращает результат агрегирования (или конечное состояние при использовании комбинатора [-State](../../sql-reference/aggregate-functions/combinators.md#state)).
**Синтаксис**
**Синтаксис**
``` sql
finalizeAggregation(state)
@ -1421,7 +1494,7 @@ SELECT finalizeAggregation(( SELECT sumState(number) FROM numbers(10)));
└──────────────────────────────────┘
```
Обратите внимание, что значения `NULL` игнорируются.
Обратите внимание, что значения `NULL` игнорируются.
Запрос:
@ -1470,7 +1543,7 @@ FROM numbers(10);
**Смотрите также**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
- [initializeAggregation](../../sql-reference/aggregate-functions/reference/initializeAggregation.md)
- [initializeAggregation](#initializeaggregation)
## runningAccumulate {#runningaccumulate}
@ -1537,13 +1610,13 @@ SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k)
Запрос:
```sql
SELECT
SELECT
grouping,
item,
runningAccumulate(state, grouping) AS res
FROM
FROM
(
SELECT
SELECT
toInt8(number / 4) AS grouping,
number AS item,
sumState(number) AS state
@ -1732,7 +1805,7 @@ SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers
randomString(length)
```
**Аргументы**
**Аргументы**
- `length` — длина строки. Положительное целое число.
@ -1831,13 +1904,13 @@ randomStringUTF8(length)
Запрос:
```sql
```sql
SELECT randomStringUTF8(13)
```
Результат:
```text
```text
┌─randomStringUTF8(13)─┐
│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │
└──────────────────────┘
@ -1848,13 +1921,13 @@ SELECT randomStringUTF8(13)
Возвращает текущее значение [пользовательской настройки](../../operations/settings/index.md#custom_settings).
**Синтаксис**
**Синтаксис**
```sql
getSetting('custom_setting')
```
**Параметр**
**Параметр**
- `custom_setting` — название настройки. [String](../../sql-reference/data-types/string.md).
@ -1866,7 +1939,7 @@ getSetting('custom_setting')
```sql
SET custom_a = 123;
SELECT getSetting('custom_a');
SELECT getSetting('custom_a');
```
**Результат**
@ -1875,7 +1948,7 @@ SELECT getSetting('custom_a');
123
```
**См. также**
**См. также**
- [Пользовательские настройки](../../operations/settings/index.md#custom_settings)
@ -1889,10 +1962,10 @@ SELECT getSetting('custom_a');
isDecimalOverflow(d, [p])
```
**Аргументы**
**Аргументы**
- `d` — число. [Decimal](../../sql-reference/data-types/decimal.md).
- `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
- `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Возвращаемое значение**
@ -1926,7 +1999,7 @@ SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9),
countDigits(x)
```
**Аргументы**
**Аргументы**
- `x` — [целое](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) или [дробное](../../sql-reference/data-types/decimal.md) число.

View File

@ -5,14 +5,14 @@ toc_title: SYSTEM
# Запросы SYSTEM {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [RELOAD MODELS](#query_language-system-reload-models)
- [RELOAD MODEL](#query_language-system-reload-model)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
@ -24,10 +24,10 @@ toc_title: SYSTEM
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
@ -36,13 +36,13 @@ toc_title: SYSTEM
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
Перегружает все [Встроенные словари](../dictionaries/internal-dicts.md).
По умолчанию встроенные словари выключены.
По умолчанию встроенные словари выключены.
Всегда возвращает `Ok.`, вне зависимости от результата обновления встроенных словарей.
## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
Перегружает все словари, которые были успешно загружены до этого.
@ -115,7 +115,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
Сбрасывает кеш скомпилированных выражений. Используется при разработке ClickHouse и тестах производительности.
Компилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile](../../operations/settings/settings.md#compile)
омпилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile-expressions](../../operations/settings/settings.md#compile-expressions)
## FLUSH LOGS {#query_language-system-flush_logs}
@ -194,7 +194,7 @@ SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name
SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
```
### START TTL MERGES {#query_language-start-ttl-merges}
### START TTL MERGES {#query_language-start-ttl-merges}
Запускает фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -203,7 +203,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
```
### STOP MOVES {#query_language-stop-moves}
### STOP MOVES {#query_language-stop-moves}
Позволяет остановить фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -212,7 +212,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
```
### START MOVES {#query_language-start-moves}
### START MOVES {#query_language-start-moves}
Запускает фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -261,7 +261,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
@ -269,7 +269,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
@ -277,7 +277,7 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
### SYNC REPLICA {#query_language-system-sync-replica}
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:
``` sql
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name

View File

@ -38,6 +38,18 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
Остальные условия и ограничение выборки `LIMIT` будут выполнены в ClickHouse только после выполнения запроса к MySQL.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
или
```sql
SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password');
```
**Возвращаемое значение**
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.

View File

@ -43,6 +43,18 @@ PostgreSQL массивы конвертируются в массивы ClickHo
!!! info "Примечание"
Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы.
Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например:
```sql
SELECT name FROM postgresql(`postgres{1|2|3}:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
или
```sql
SELECT name FROM postgresql(`postgres1:5431|postgres2:5432`, 'postgres_database', 'postgres_table', 'user', 'password');
```
При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`.

View File

@ -47,6 +47,13 @@ When all prerequisites are installed, running `build.py` without args (there are
The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888.
## How to change code highlighting? {#how-to-change-code-hl}
ClickHouse does not use mkdocs `highlightjs` feature. It uses modified pygments styles instead.
If you want to change code highlighting, edit the `website/css/highlight.css` file.
Currently, an [eighties](https://github.com/idleberg/base16-pygments/blob/master/css/base16-eighties.dark.css) theme
is used.
## How to subscribe on documentation changes? {#how-to-subscribe-on-documentation-changes}
At the moment theres no easy way to do just that, but you can consider:

View File

@ -87,6 +87,7 @@ def build_for_lang(lang, args):
website_url = 'https://clickhouse.tech'
site_name = site_names.get(lang, site_names['en']) % ''
site_name = site_name.replace(' ', ' ')
raw_config = dict(
site_name=site_name,
site_url=f'{website_url}/docs/{lang}/',

View File

@ -1,4 +1,4 @@
# 设置 {#set}
# 集合 {#set}
始终存在于 RAM 中的数据集。它适用于IN运算符的右侧请参见 «IN运算符» 部分)。

View File

@ -817,21 +817,22 @@ load_balancing = first_or_random
为了保持一致性(以获取相同数据拆分的不同部分),此选项仅在设置了采样键时有效。
副本滞后不受控制。
## 编译 {#compile}
## compile_expressions {#compile-expressions}
启用查询的编译。 默认情况下0禁用
啟用或禁用在運行時使用 LLVM 將常用的簡單函數和運算符編譯為本機代碼
编译仅用于查询处理管道的一部分用于聚合的第一阶段GROUP BY
如果编译了管道的这一部分,则由于部署周期较短和内联聚合函数调用,查询可能运行得更快。 对于具有多个简单聚合函数的查询,可以看到最大的性能改进(在极少数情况下可快四倍)。 通常,性能增益是微不足道的。 在极少数情况下,它可能会减慢查询执行速度。
可能的值:
## min_count_to_compile {#min-count-to-compile}
- 0 — 禁用。
- 1 — 啟用。
在运行编译之前可能使用已编译代码块的次数。 默认情况下3。
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
如果该值为1或更大则编译在单独的线程中异步进行。 结果将在准备就绪后立即使用,包括当前正在运行的查询。
默認值:`1`。
对于查询中使用的聚合函数的每个不同组合以及GROUP BY子句中的键类型都需要编译代码。
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## min_count_to_compile_expression {#min-count-to-compile-expression}
在編譯之前執行相同表達式的最小計數。
默認值:`3`。
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}

View File

@ -95,7 +95,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
重置已编译的表达式缓存。用于ClickHouse开发和性能测试。
`query/user/profile` 启用配置项 [compile](../../operations/settings/settings.md#compile)时,编译的表达式缓存开启。
`query/user/profile` 启用配置项 [compile-expressions](../../operations/settings/settings.md#compile-expressions)时,编译的表达式缓存开启。
## FLUSH LOGS {#query_language-system-flush_logs}
@ -209,7 +209,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
### STOP FETCHES {#query_language-system-stop-fetches}
停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
@ -218,7 +218,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
### START FETCHES {#query_language-system-start-fetches}
启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
@ -227,7 +227,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
@ -235,7 +235,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]

View File

@ -0,0 +1,120 @@
---
toc_priority: 42
toc_title: postgresql
---
# postgresql {#postgresql}
允许对存储在远程 PostgreSQL 服务器上的数据进行 `SELECT``INSERT` 查询.
**语法**
``` sql
postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`])
```
**参数**
- `host:port` — PostgreSQL 服务器地址.
- `database` — 远程数据库名称.
- `table` — 远程表名称.
- `user` — PostgreSQL 用户.
- `password` — 用户密码.
- `schema` — 非默认的表结构. 可选.
**返回值**
一个表对象,其列数与原 PostgreSQL 表的列数相同。
!!! info "Note"
在`INSERT`查询中,为了区分表函数`postgresql(..)`和表名以及表的列名列表,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。请看下面的例子。
## 实施细节 {#implementation-details}
`SELECT`查询在 PostgreSQL 上以 `COPY (SELECT ...) TO STDOUT` 的方式在只读的 PostgreSQL 事务中运行,每次在`SELECT`查询后提交。
简单的`WHERE`子句,如`=`、`=`、`>`、`>=`、`<`、`<=`和`IN`在PostgreSQL服务器上执行。
所有的连接、聚合、排序,`IN [ 数组 ]`条件和`LIMIT`采样约束只有在对PostgreSQL的查询结束后才会在ClickHouse中执行。
PostgreSQL 上的`INSERT`查询以`COPY "table_name" (field1, field2, ... fieldN) FROM STDIN`的方式在 PostgreSQL 事务中运行,每次`INSERT`语句后自动提交。
PostgreSQL 数组类型将转换为 ClickHouse 数组。
!!! info "Note"
要小心,在 PostgreSQL 中,像 Integer[] 这样的数组数据类型列可以在不同的行中包含不同维度的数组,但在 ClickHouse 中,只允许在所有的行中有相同维度的多维数组。
支持设置 PostgreSQL 字典源中 Replicas 的优先级。地图中的数字越大,优先级就越低。`0`代表最高的优先级。
**示例**
PostgreSQL 中的表:
``` text
postgres=# CREATE TABLE "public"."test" (
"int_id" SERIAL,
"int_nullable" INT NULL DEFAULT NULL,
"float" FLOAT NOT NULL,
"str" VARCHAR(100) NOT NULL DEFAULT '',
"float_nullable" FLOAT NULL DEFAULT NULL,
PRIMARY KEY (int_id));
CREATE TABLE
postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
从 ClickHouse 检索数据:
```sql
SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password') WHERE str IN ('test');
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴──────┴────────────────┘
```
插入数据:
```sql
INSERT INTO TABLE FUNCTION postgresql('localhost:5432', 'test', 'test', 'postgrsql_user', 'password') (int_id, float) VALUES (2, 3);
SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password');
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │
│ 2 │ ᴺᵁᴸᴸ │ 3 │ │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴──────┴────────────────┘
```
使用非默认的表结构:
```text
postgres=# CREATE SCHEMA "nice.schema";
postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
```
```sql
CREATE TABLE pg_table_schema_with_dots (a UInt32)
ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
```
**另请参阅**
- [PostgreSQL 表引擎](../../engines/table-engines/integrations/postgresql.md)
- [使用 PostgreSQL 作为外部字典的来源](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[原始文章](https://clickhouse.tech/docs/en/sql-reference/table-functions/postgresql/) <!--hide-->

View File

@ -577,7 +577,18 @@ private:
}
if (!history_file.empty() && !fs::exists(history_file))
FS::createFile(history_file);
{
/// Avoid TOCTOU issue.
try
{
FS::createFile(history_file);
}
catch (const ErrnoException & e)
{
if (e.getErrno() != EEXIST)
throw;
}
}
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {";", "\\G"};
@ -2435,6 +2446,8 @@ public:
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}

View File

@ -324,6 +324,13 @@ Poco::Net::SocketAddress Server::socketBindListen(Poco::Net::ServerSocket & sock
socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config().getBool("listen_reuse_port", false));
#endif
/// If caller requests any available port from the OS, discover it after binding.
if (port == 0)
{
address = socket.address();
LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port());
}
socket.listen(/* backlog = */ config().getUInt("listen_backlog", 64));
return address;

View File

@ -30,16 +30,16 @@ static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argume
template <typename Trait, typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataTypePtr & argument_type, TArgs ... args)
inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, Trait>(*argument_type, argument_type, std::forward<TArgs>(args)...))
if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, Trait>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
WhichDataType which(argument_type);
if (which.idx == TypeIndex::String)
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeString, Trait>>(argument_type, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeString, Trait>>(argument_type, parameters, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeGeneral, Trait>>(argument_type, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeGeneral, Trait>>(argument_type, parameters, std::forward<TArgs>(args)...);
// Link list implementation doesn't show noticeable performance improvement
// if (which.idx == TypeIndex::String)
@ -79,9 +79,9 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!limit_size)
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<false, Sampler::NONE>>(argument_types[0]);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<false, Sampler::NONE>>(argument_types[0], parameters);
else
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::NONE>>(argument_types[0], max_elems);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::NONE>>(argument_types[0], parameters, max_elems);
}
AggregateFunctionPtr createAggregateFunctionGroupArraySample(
@ -114,7 +114,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
else
seed = thread_local_rng();
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::RNG>>(argument_types[0], max_elems, seed);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::RNG>>(argument_types[0], parameters, max_elems, seed);
}
}

View File

@ -119,9 +119,9 @@ class GroupArrayNumericImpl final
public:
explicit GroupArrayNumericImpl(
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, {})
{data_type_}, parameters_)
, max_elems(max_elems_)
, seed(seed_)
{
@ -421,9 +421,9 @@ class GroupArrayGeneralImpl final
UInt64 seed;
public:
GroupArrayGeneralImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
{data_type_}, {})
{data_type_}, parameters_)
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
@ -696,8 +696,8 @@ class GroupArrayGeneralListImpl final
UInt64 max_elems;
public:
GroupArrayGeneralListImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, Trait>>({data_type_}, {})
GroupArrayGeneralListImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, Trait>>({data_type_}, parameters_)
, data_type(this->argument_types[0])
, max_elems(max_elems_)
{

View File

@ -1,6 +1,7 @@
#pragma once
#include <algorithm>
#include <memory>
#include <boost/noncopyable.hpp>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -43,7 +44,7 @@ private:
void toLarge()
{
rb = std::make_shared<RoaringBitmap>();
rb = std::make_unique<RoaringBitmap>();
for (const auto & x : small)
rb->add(static_cast<Value>(x.getValue()));
small.clear();
@ -113,7 +114,7 @@ public:
readVarUInt(size, in);
std::unique_ptr<char[]> buf(new char[size]);
in.readStrict(buf.get(), size);
rb = std::make_shared<RoaringBitmap>(RoaringBitmap::read(buf.get()));
rb = std::make_unique<RoaringBitmap>(RoaringBitmap::read(buf.get()));
}
}
@ -140,7 +141,7 @@ public:
*/
std::shared_ptr<RoaringBitmap> getNewRoaringBitmapFromSmall() const
{
std::shared_ptr<RoaringBitmap> ret = std::make_shared<RoaringBitmap>();
std::shared_ptr<RoaringBitmap> ret = std::make_unique<RoaringBitmap>();
for (const auto & x : small)
ret->add(static_cast<Value>(x.getValue()));
return ret;

View File

@ -1,6 +1,7 @@
#pragma once
#include <string>
#include <optional>
#include <vector>
#include <boost/noncopyable.hpp>
#include <unordered_map>

View File

@ -87,18 +87,4 @@ FilterDescription::FilterDescription(const IColumn & column_)
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
}
void checkColumnCanBeUsedAsFilter(const ColumnWithTypeAndName & column_elem)
{
ConstantFilterDescription const_filter;
if (column_elem.column)
const_filter = ConstantFilterDescription(*column_elem.column);
if (!const_filter.always_false && !const_filter.always_true)
{
auto column = column_elem.column ? column_elem.column : column_elem.type->createColumn();
FilterDescription filter(*column);
}
}
}

View File

@ -32,7 +32,4 @@ struct FilterDescription
struct ColumnWithTypeAndName;
/// Will throw an exception if column_elem is cannot be used as a filter column.
void checkColumnCanBeUsedAsFilter(const ColumnWithTypeAndName & column_elem);
}

View File

@ -555,6 +555,8 @@
M(585, CANNOT_PARSE_YAML) \
M(586, CANNOT_CREATE_FILE) \
M(587, CONCURRENT_ACCESS_NOT_SUPPORTED) \
M(588, DISTRIBUTED_BROKEN_BATCH_INFO) \
M(589, DISTRIBUTED_BROKEN_BATCH_FILES) \
\
M(998, POSTGRESQL_CONNECTION_FAILURE) \
M(999, KEEPER_EXCEPTION) \

View File

@ -60,7 +60,8 @@ struct ThreadStack
void * getData() const { return data; }
private:
static constexpr size_t size = 16 << 10; /// 16 KiB - not too big but enough to handle error.
/// 16 KiB - not too big but enough to handle error.
static constexpr size_t size = std::max<size_t>(16 << 10, MINSIGSTKSZ);
void * data;
};

View File

@ -44,7 +44,7 @@ size_t getStackSize(void ** out_address)
size = pthread_main_np() ? (8 * 1024 * 1024) : pthread_get_stacksize_np(thread);
// stack address points to the start of the stack, not the end how it's returned by pthread_get_stackaddr_np
address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(thread)) - max_stack_size);
address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(thread)) - size);
#else
pthread_attr_t attr;
# if defined(__FreeBSD__) || defined(OS_SUNOS)

View File

@ -2,6 +2,7 @@
#include <map>
#include <list>
#include <optional>
#include <string>
#include <set>
#include <initializer_list>

View File

@ -90,6 +90,7 @@ class IColumn;
M(Milliseconds, distributed_directory_monitor_max_sleep_time_ms, 30000, "Maximum sleep time for StorageDistributed DirectoryMonitors, it limits exponential growth too.", 0) \
\
M(Bool, distributed_directory_monitor_batch_inserts, false, "Should StorageDistributed DirectoryMonitors try to batch individual inserts into bigger ones.", 0) \
M(Bool, distributed_directory_monitor_split_batch_on_failure, false, "Should StorageDistributed DirectoryMonitors try to split batch into smaller in case of failures.", 0) \
\
M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
\
@ -469,6 +470,7 @@ class IColumn;
M(UnionMode, union_default_mode, UnionMode::Unspecified, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \
\
M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int FILE_ALREADY_EXISTS;
extern const int INCORRECT_QUERY;
extern const int ABORTED;
}
class AtomicDatabaseTablesSnapshotIterator final : public DatabaseTablesSnapshotIterator
@ -210,7 +211,7 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_
std::unique_lock<std::mutex> other_db_lock;
if (inside_database)
db_lock = std::unique_lock{mutex};
else if (this < &other_db)
else if (this < &other_db)
{
db_lock = std::unique_lock{mutex};
other_db_lock = std::unique_lock{other_db.mutex};
@ -420,7 +421,18 @@ void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, bool has
{
/// Recreate symlinks to table data dirs in case of force restore, because some of them may be broken
if (has_force_restore_data_flag)
fs::remove_all(path_to_table_symlinks);
{
for (const auto & table_path : fs::directory_iterator(path_to_table_symlinks))
{
if (!fs::is_symlink(table_path))
{
throw Exception(ErrorCodes::ABORTED,
"'{}' is not a symlink. Atomic database should contains only symlinks.", std::string(table_path.path()));
}
fs::remove(table_path);
}
}
DatabaseOrdinary::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach);

View File

@ -25,14 +25,19 @@ namespace ErrorCodes
}
static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, uint16_t dimensions)
static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, uint16_t dimensions, const std::function<void()> & recheck_array)
{
DataTypePtr res;
bool is_array = false;
/// Get rid of trailing '[]' for arrays
if (dimensions)
if (type.ends_with("[]"))
{
is_array = true;
while (type.ends_with("[]"))
type.resize(type.size() - 2);
}
if (type == "smallint")
res = std::make_shared<DataTypeInt16>();
@ -88,8 +93,24 @@ static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, ui
res = std::make_shared<DataTypeString>();
if (is_nullable)
res = std::make_shared<DataTypeNullable>(res);
while (dimensions--)
res = std::make_shared<DataTypeArray>(res);
if (is_array)
{
/// In some cases att_ndims does not return correct number of dimensions
/// (it might return incorrect 0 number, for example, when a postgres table is created via 'as select * from table_with_arrays').
/// So recheck all arrays separately afterwards. (Cannot check here on the same connection because another query is in execution).
if (!dimensions)
{
/// Return 1d array type and recheck all arrays dims with array_ndims
res = std::make_shared<DataTypeArray>(res);
recheck_array();
}
else
{
while (dimensions--)
res = std::make_shared<DataTypeArray>(res);
}
}
return res;
}
@ -98,7 +119,7 @@ static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, ui
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
postgres::ConnectionHolderPtr connection_holder, const String & postgres_table_name, bool use_nulls)
{
auto columns = NamesAndTypesList();
auto columns = NamesAndTypes();
if (postgres_table_name.find('\'') != std::string::npos
|| postgres_table_name.find('\\') != std::string::npos)
@ -115,22 +136,46 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
"AND NOT attisdropped AND attnum > 0", postgres_table_name);
try
{
pqxx::read_transaction tx(connection_holder->get());
auto stream{pqxx::stream_from::query(tx, query)};
std::tuple<std::string, std::string, std::string, uint16_t> row;
while (stream >> row)
std::set<size_t> recheck_arrays_indexes;
{
columns.push_back(NameAndTypePair(
std::get<0>(row),
convertPostgreSQLDataType(
std::get<1>(row),
use_nulls && (std::get<2>(row) == "f"), /// 'f' means that postgres `not_null` is false, i.e. value is nullable
std::get<3>(row))));
pqxx::read_transaction tx(connection_holder->get());
auto stream{pqxx::stream_from::query(tx, query)};
std::tuple<std::string, std::string, std::string, uint16_t> row;
size_t i = 0;
auto recheck_array = [&]() { recheck_arrays_indexes.insert(i); };
while (stream >> row)
{
auto data_type = convertPostgreSQLDataType(std::get<1>(row),
use_nulls && (std::get<2>(row) == "f"), /// 'f' means that postgres `not_null` is false, i.e. value is nullable
std::get<3>(row),
recheck_array);
columns.push_back(NameAndTypePair(std::get<0>(row), data_type));
++i;
}
stream.complete();
tx.commit();
}
for (const auto & i : recheck_arrays_indexes)
{
const auto & name_and_type = columns[i];
pqxx::nontransaction tx(connection_holder->get());
/// All rows must contain the same number of dimensions, so limit 1 is ok. If number of dimensions in all rows is not the same -
/// such arrays are not able to be used as ClickHouse Array at all.
pqxx::result result{tx.exec(fmt::format("SELECT array_ndims({}) FROM {} LIMIT 1", name_and_type.name, postgres_table_name))};
auto dimensions = result[0][0].as<int>();
/// It is always 1d array if it is in recheck.
DataTypePtr type = assert_cast<const DataTypeArray *>(name_and_type.type.get())->getNestedType();
while (dimensions--)
type = std::make_shared<DataTypeArray>(type);
columns[i] = NameAndTypePair(name_and_type.name, type);
}
stream.complete();
tx.commit();
}
catch (const pqxx::undefined_table &)
{
throw Exception(fmt::format(
@ -146,7 +191,7 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
if (columns.empty())
return nullptr;
return std::make_shared<NamesAndTypesList>(columns);
return std::make_shared<NamesAndTypesList>(NamesAndTypesList(columns.begin(), columns.end()));
}
}

View File

@ -883,6 +883,7 @@ void DiskS3::restoreFileOperations(const RestoreInformation & restore_informatio
to_path /= from_path.parent_path().filename();
else
to_path /= from_path.filename();
fs::create_directories(to_path);
fs::copy(from_path, to_path, fs::copy_options::recursive | fs::copy_options::overwrite_existing);
fs::remove_all(from_path);
}

View File

@ -992,7 +992,7 @@ public:
UInt8 byte = x >> offset;
/// Leading zeros.
if (byte == 0 && !was_nonzero && offset)
if (byte == 0 && !was_nonzero && offset) // -V560
continue;
was_nonzero = true;

View File

@ -3,9 +3,10 @@
#include <city.h>
#include <farmhash.h>
#include <metrohash.h>
#include <MurmurHash2.h>
#include <MurmurHash3.h>
#if !defined(ARCADIA_BUILD)
# include <murmurhash2.h>
# include <murmurhash3.h>
# include "config_functions.h"
# include "config_core.h"
#endif

View File

@ -15,6 +15,7 @@ ADDINCL(
contrib/libs/libdivide
contrib/libs/rapidjson/include
contrib/libs/xxhash
contrib/restricted/murmurhash
)
PEERDIR(
@ -30,6 +31,7 @@ PEERDIR(
contrib/libs/metrohash
contrib/libs/rapidjson
contrib/libs/xxhash
contrib/restricted/murmurhash
library/cpp/consistent_hashing
)

View File

@ -14,6 +14,7 @@ ADDINCL(
contrib/libs/libdivide
contrib/libs/rapidjson/include
contrib/libs/xxhash
contrib/restricted/murmurhash
)
PEERDIR(
@ -29,6 +30,7 @@ PEERDIR(
contrib/libs/metrohash
contrib/libs/rapidjson
contrib/libs/xxhash
contrib/restricted/murmurhash
library/cpp/consistent_hashing
)

View File

@ -184,7 +184,7 @@ inline bool checkString(const String & s, ReadBuffer & buf)
return checkString(s.c_str(), buf);
}
inline bool checkChar(char c, ReadBuffer & buf)
inline bool checkChar(char c, ReadBuffer & buf) // -V1071
{
if (buf.eof() || *buf.position() != c)
return false;
@ -393,7 +393,7 @@ void readIntText(T & x, ReadBuffer & buf)
}
template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
bool tryReadIntText(T & x, ReadBuffer & buf)
bool tryReadIntText(T & x, ReadBuffer & buf) // -V1071
{
return readIntTextImpl<T, bool, check_overflow>(x, buf);
}
@ -1248,7 +1248,7 @@ bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current);
struct PcgDeserializer
{
static void deserializePcg32(const pcg32_fast & rng, ReadBuffer & buf)
static void deserializePcg32(pcg32_fast & rng, ReadBuffer & buf)
{
decltype(rng.state_) multiplier, increment, state;
readText(multiplier, buf);
@ -1261,6 +1261,8 @@ struct PcgDeserializer
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", rng.multiplier(), multiplier);
if (increment != rng.increment())
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", rng.increment(), increment);
rng.state_ = state;
}
};

View File

@ -212,6 +212,7 @@ public:
/// Conversion should be possible with only usage of CAST function and renames.
/// @param ignore_constant_values - Do not check that constants are same. Use value from result_header.
/// @param add_casted_columns - Create new columns with converted values instead of replacing original.
/// @param new_names - Output parameter for new column names when add_casted_columns is used.
static ActionsDAGPtr makeConvertingActions(
const ColumnsWithTypeAndName & source,
const ColumnsWithTypeAndName & result,

View File

@ -348,7 +348,7 @@ SetPtr makeExplicitSet(
const ASTPtr & left_arg = args.children.at(0);
const ASTPtr & right_arg = args.children.at(1);
auto column_name = left_arg->getColumnName();
auto column_name = left_arg->getColumnName(context->getSettingsRef());
const auto & dag_node = actions.findInIndex(column_name);
const DataTypePtr & left_arg_type = dag_node.result_type;
@ -641,7 +641,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
{
// If the argument is a literal, we generated a unique column name for it.
// Use it instead of a generic display name.
auto child_column_name = ast->getColumnName();
auto child_column_name = ast->getColumnName(data.getContext()->getSettingsRef());
const auto * as_literal = ast->as<ASTLiteral>();
if (as_literal)
{
@ -703,7 +703,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
func->setAlias(data.getUniqueName("_ut_" + name));
auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName());
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef()));
columns.push_back(std::move(func));
}
@ -740,6 +740,7 @@ void ActionsMatcher::visit(ASTExpressionList & expression_list, const ASTPtr &,
void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Data & data)
{
auto column_name = identifier.getColumnName();
if (data.hasColumn(column_name))
return;
@ -766,7 +767,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Dat
void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
{
auto column_name = ast->getColumnName();
auto column_name = ast->getColumnName(data.getContext()->getSettingsRef());
if (data.hasColumn(column_name))
return;
@ -782,7 +783,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
ASTPtr arg = node.arguments->children.at(0);
visit(arg, data);
if (!data.only_consts)
data.addArrayJoin(arg->getColumnName(), column_name);
data.addArrayJoin(arg->getColumnName(data.getContext()->getSettingsRef()), column_name);
return;
}
@ -804,7 +805,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
/// We are in the part of the tree that we are not going to compute. You just need to define types.
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
auto argument_name = node.arguments->children.at(0)->getColumnName();
auto argument_name = node.arguments->children.at(0)->getColumnName(data.getContext()->getSettingsRef());
data.addFunction(
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
@ -933,7 +934,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (!prepared_set->empty())
column.name = data.getUniqueName("__set");
else
column.name = child->getColumnName();
column.name = child->getColumnName(data.getContext()->getSettingsRef());
if (!data.hasColumn(column.name))
{
@ -1012,7 +1013,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
visit(lambda->arguments->children.at(1), data);
auto lambda_dag = data.actions_stack.popLevel();
String result_name = lambda->arguments->children.at(1)->getColumnName();
String result_name = lambda->arguments->children.at(1)->getColumnName(data.getContext()->getSettingsRef());
lambda_dag->removeUnusedActions(Names(1, result_name));
auto lambda_actions = std::make_shared<ExpressionActions>(
@ -1027,7 +1028,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
captured.push_back(required_arg);
/// We can not name `getColumnName()`,
/// We can not name `getColumnName(data.getContext()->getSettingsRef())`,
/// because it does not uniquely define the expression (the types of arguments can be different).
String lambda_name = data.getUniqueName("__lambda");
@ -1057,7 +1058,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (arguments_present)
{
/// Calculate column name here again, because AST may be changed here (in case of untuple).
data.addFunction(function_builder, argument_names, ast->getColumnName());
data.addFunction(function_builder, argument_names, ast->getColumnName(data.getContext()->getSettingsRef()));
}
}
@ -1071,7 +1072,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
// AST here? Anyway, do not modify the column name if it is set already.
if (literal.unique_column_name.empty())
{
const auto default_name = literal.getColumnName();
const auto default_name = literal.getColumnName(data.getContext()->getSettingsRef());
const auto & index = data.actions_stack.getLastActionsIndex();
const auto * existing_column = index.tryGetNode(default_name);
@ -1151,7 +1152,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
}
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
String set_id = right_in_operand->getColumnName();
String set_id = right_in_operand->getColumnName(data.getContext()->getSettingsRef());
SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];
@ -1187,7 +1188,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
{
const auto & last_actions = data.actions_stack.getLastActions();
const auto & index = data.actions_stack.getLastActionsIndex();
if (index.contains(left_in_operand->getColumnName()))
if (index.contains(left_in_operand->getColumnName(data.getContext()->getSettingsRef())))
/// An explicit enumeration of values in parentheses.
return makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, data.prepared_sets);
else

View File

@ -179,7 +179,7 @@ String Cluster::Address::toFullString(bool use_compact_format) const
// shard_num/replica_num like in system.clusters table
throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR);
return "shard" + std::to_string(shard_index) + "_replica" + std::to_string(replica_index);
return fmt::format("shard{}_replica{}", shard_index, replica_index);
}
else
{
@ -199,7 +199,7 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string)
const char * user_pw_end = strchr(full_string.data(), '@');
/// parsing with the new [shard{shard_index}[_replica{replica_index}]] format
/// parsing with the new shard{shard_index}[_replica{replica_index}] format
if (!user_pw_end && startsWith(full_string, "shard"))
{
const char * underscore = strchr(full_string.data(), '_');
@ -401,6 +401,9 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
ShardInfoInsertPathForInternalReplication insert_paths;
/// "_all_replicas" is a marker that will be replaced with all replicas
/// (for creating connections in the Distributed engine)
insert_paths.compact = fmt::format("shard{}_all_replicas", current_shard_num);
for (const auto & replica_key : replica_keys)
{
@ -419,20 +422,10 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
if (internal_replication)
{
/// use_compact_format=0
{
auto dir_name = replica_addresses.back().toFullString(false /* use_compact_format */);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name);
}
/// use_compact_format=1
{
auto dir_name = replica_addresses.back().toFullString(true /* use_compact_format */);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica_compact, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica_compact, dir_name);
}
auto dir_name = replica_addresses.back().toFullString(/* use_compact_format= */ false);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name);
}
}
else
@ -660,17 +653,17 @@ const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool pr
const auto & paths = insert_path_for_internal_replication;
if (!use_compact_format)
{
if (prefer_localhost_replica)
return paths.prefer_localhost_replica;
else
return paths.no_prefer_localhost_replica;
const auto & path = prefer_localhost_replica ? paths.prefer_localhost_replica : paths.no_prefer_localhost_replica;
if (path.size() > NAME_MAX)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Path '{}' for async distributed INSERT is too long (exceed {} limit)", path, NAME_MAX);
}
return path;
}
else
{
if (prefer_localhost_replica)
return paths.prefer_localhost_replica_compact;
else
return paths.no_prefer_localhost_replica_compact;
return paths.compact;
}
}

View File

@ -166,10 +166,8 @@ public:
std::string prefer_localhost_replica;
/// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=0
std::string no_prefer_localhost_replica;
/// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=1
std::string prefer_localhost_replica_compact;
/// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=1
std::string no_prefer_localhost_replica_compact;
/// use_compact_format_in_distributed_parts_names=1
std::string compact;
};
struct ShardInfo

View File

@ -394,7 +394,7 @@ struct ContextSharedPart
/// Clusters for distributed tables
/// Initialized on demand (on distributed storages initialization) since Settings should be initialized
std::unique_ptr<Clusters> clusters;
std::shared_ptr<Clusters> clusters;
ConfigurationPtr clusters_config; /// Stores updated configs
mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config
@ -1882,7 +1882,7 @@ std::optional<UInt16> Context::getTCPPortSecure() const
std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) const
{
auto res = getClusters().getCluster(cluster_name);
auto res = getClusters()->getCluster(cluster_name);
if (res)
return res;
@ -1896,7 +1896,7 @@ std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) c
std::shared_ptr<Cluster> Context::tryGetCluster(const std::string & cluster_name) const
{
return getClusters().getCluster(cluster_name);
return getClusters()->getCluster(cluster_name);
}
@ -1911,7 +1911,7 @@ void Context::reloadClusterConfig() const
}
const auto & config = cluster_config ? *cluster_config : getConfigRef();
auto new_clusters = std::make_unique<Clusters>(config, settings);
auto new_clusters = std::make_shared<Clusters>(config, settings);
{
std::lock_guard lock(shared->clusters_mutex);
@ -1927,16 +1927,16 @@ void Context::reloadClusterConfig() const
}
Clusters & Context::getClusters() const
std::shared_ptr<Clusters> Context::getClusters() const
{
std::lock_guard lock(shared->clusters_mutex);
if (!shared->clusters)
{
const auto & config = shared->clusters_config ? *shared->clusters_config : getConfigRef();
shared->clusters = std::make_unique<Clusters>(config, settings);
shared->clusters = std::make_shared<Clusters>(config, settings);
}
return *shared->clusters;
return shared->clusters;
}

View File

@ -676,7 +676,7 @@ public:
void setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker);
DDLWorker & getDDLWorker() const;
Clusters & getClusters() const;
std::shared_ptr<Clusters> getClusters() const;
std::shared_ptr<Cluster> getCluster(const std::string & cluster_name) const;
std::shared_ptr<Cluster> tryGetCluster(const std::string & cluster_name) const;
void setClustersConfig(const ConfigurationPtr & config, const String & config_name = "remote_servers");

View File

@ -253,7 +253,7 @@ struct ExpressionActionsChain : WithContext
steps.clear();
}
ActionsDAGPtr getLastActions(bool allow_empty = false)
ActionsDAGPtr getLastActions(bool allow_empty = false) // -V1071
{
if (steps.empty())
{

View File

@ -244,7 +244,7 @@ void ExpressionAnalyzer::analyzeAggregation()
ssize_t size = group_asts.size();
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName();
const auto & column_name = group_asts[i]->getColumnName(getContext()->getSettingsRef());
const auto * node = temp_actions->tryFindInIndex(column_name);
if (!node)
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
@ -398,7 +398,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(left_in_operand, true, temp_actions);
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName(getContext()->getSettingsRef())))
makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, prepared_sets);
}
}
@ -446,7 +446,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, true, actions);
aggregate.column_name = node->getColumnName();
aggregate.column_name = node->getColumnName(getContext()->getSettingsRef());
const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
aggregate.argument_names.resize(arguments.size());
@ -454,7 +454,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
const auto * dag_node = actions->tryFindInIndex(name);
if (!dag_node)
{
@ -647,7 +647,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
WindowFunctionDescription window_function;
window_function.function_node = function_node;
window_function.column_name
= window_function.function_node->getColumnName();
= window_function.function_node->getColumnName(getContext()->getSettingsRef());
window_function.function_parameters
= window_function.function_node->parameters
? getAggregateFunctionParametersArray(
@ -666,7 +666,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
window_function.argument_names.resize(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
const auto * node = actions->tryFindInIndex(name);
if (!node)
@ -953,10 +953,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
{
const auto * select_query = getSelectQuery();
ActionsDAGPtr prewhere_actions;
if (!select_query->prewhere())
return prewhere_actions;
return nullptr;
Names first_action_names;
if (!chain.steps.empty())
@ -964,7 +962,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
auto & step = chain.lastStep(sourceColumns());
getRootActions(select_query->prewhere(), only_types, step.actions());
String prewhere_column_name = select_query->prewhere()->getColumnName();
String prewhere_column_name = select_query->prewhere()->getColumnName(getContext()->getSettingsRef());
step.addRequiredOutput(prewhere_column_name);
const auto & node = step.actions()->findInIndex(prewhere_column_name);
@ -973,6 +971,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
ActionsDAGPtr prewhere_actions;
{
/// Remove unused source_columns from prewhere actions.
auto tmp_actions_dag = std::make_shared<ActionsDAG>(sourceColumns());
@ -1038,18 +1037,6 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
return prewhere_actions;
}
void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ActionsDAGPtr actions_dag, String column_name)
{
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
// FIXME: assert(filter_info);
auto * expression_step = typeid_cast<ExpressionActionsChain::ExpressionActionsStep *>(&step);
expression_step->actions_dag = std::move(actions_dag);
step.addRequiredOutput(column_name);
chain.addStep();
}
bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = getSelectQuery();
@ -1061,7 +1048,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
getRootActions(select_query->where(), only_types, step.actions());
auto where_column_name = select_query->where()->getColumnName();
auto where_column_name = select_query->where()->getColumnName(getContext()->getSettingsRef());
step.addRequiredOutput(where_column_name);
const auto & node = step.actions()->findInIndex(where_column_name);
@ -1086,7 +1073,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
ASTs asts = select_query->groupBy()->children;
for (const auto & ast : asts)
{
step.addRequiredOutput(ast->getColumnName());
step.addRequiredOutput(ast->getColumnName(getContext()->getSettingsRef()));
getRootActions(ast, only_types, step.actions());
}
@ -1114,7 +1101,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
for (const auto & name : desc.argument_names)
step.addRequiredOutput(name);
/// Collect aggregates removing duplicates by node.getColumnName()
/// Collect aggregates removing duplicates by node.getColumnName(getContext()->getSettingsRef())
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
/// @note The original recollection logic didn't remove duplicates.
GetAggregatesVisitor::Data data;
@ -1169,7 +1156,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// (2b) Required function argument columns.
for (const auto & a : f.function_node->arguments->children)
{
step.addRequiredOutput(a->getColumnName());
step.addRequiredOutput(a->getColumnName(getContext()->getSettingsRef()));
}
}
@ -1191,7 +1178,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
getRootActionsForHaving(select_query->having(), only_types, step.actions());
step.addRequiredOutput(select_query->having()->getColumnName());
step.addRequiredOutput(select_query->having()->getColumnName(getContext()->getSettingsRef()));
return true;
}
@ -1215,7 +1202,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
continue;
}
step.addRequiredOutput(child->getColumnName());
step.addRequiredOutput(child->getColumnName(getContext()->getSettingsRef()));
}
}
@ -1243,7 +1230,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
if (!ast || ast->children.empty())
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children.at(0);
step.addRequiredOutput(order_expression->getColumnName());
step.addRequiredOutput(order_expression->getColumnName(getContext()->getSettingsRef()));
if (ast->with_fill)
with_fill = true;
@ -1293,7 +1280,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
for (const auto & child : select_query->limitBy()->children)
{
auto child_name = child->getColumnName();
auto child_name = child->getColumnName(getContext()->getSettingsRef());
if (!aggregated_names.count(child_name))
step.addRequiredOutput(std::move(child_name));
}
@ -1309,13 +1296,15 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
NamesWithAliases result_columns;
const auto & settings = getContext()->getSettingsRef();
ASTs asts = select_query->select()->children;
for (const auto & ast : asts)
{
String result_name = ast->getAliasOrColumnName();
String result_name = ast->getAliasOrColumnName(settings);
if (required_result_columns.empty() || required_result_columns.count(result_name))
{
std::string source_name = ast->getColumnName();
std::string source_name = ast->getColumnName(settings);
/*
* For temporary columns created by ExpressionAnalyzer for literals,
@ -1357,7 +1346,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
{
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
getRootActions(expr, only_types, step.actions());
step.addRequiredOutput(expr->getColumnName());
step.addRequiredOutput(expr->getColumnName(getContext()->getSettingsRef()));
}
@ -1374,12 +1363,13 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
else
asts = ASTs(1, query);
const auto & settings = getContext()->getSettingsRef();
for (const auto & ast : asts)
{
std::string name = ast->getColumnName();
std::string name = ast->getColumnName(settings);
std::string alias;
if (add_aliases)
alias = ast->getAliasOrColumnName();
alias = ast->getAliasOrColumnName(settings);
else
alias = name;
result_columns.emplace_back(name, alias);
@ -1514,7 +1504,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
{
prewhere_info = std::make_shared<PrewhereDAGInfo>(actions, query.prewhere()->getColumnName());
prewhere_info = std::make_shared<PrewhereDAGInfo>(actions, query.prewhere()->getColumnName(settings));
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
{
@ -1524,7 +1514,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
ExpressionActions(
prewhere_info->prewhere_actions,
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample);
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName(settings));
/// If the filter column is a constant, record it.
if (column_elem.column)
prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1559,7 +1549,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
ExpressionActions(
before_where,
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName(settings));
/// If the filter column is a constant, record it.
if (column_elem.column)
where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1650,7 +1640,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
const auto * select_query = query_analyzer.getSelectQuery();
for (const auto & child : select_query->select()->children)
{
step.addRequiredOutput(child->getColumnName());
step.addRequiredOutput(child->getColumnName(settings));
}
}
@ -1706,7 +1696,8 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
if (hasWhere())
{
where_column_name = query.where()->getColumnName();
const auto & settings = chain.getContext()->getSettingsRef();
where_column_name = query.where()->getColumnName(settings);
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
}
}

View File

@ -357,8 +357,6 @@ private:
ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types);
bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types);
JoinPtr appendJoin(ExpressionActionsChain & chain);
/// Add preliminary rows filtration. Actions are created in other expression analyzer to prevent any possible alias injection.
void appendPreliminaryFilter(ExpressionActionsChain & chain, ActionsDAGPtr actions_dag, String column_name);
/// remove_filter is set in ExpressionActionsChain::finalize();
/// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);

View File

@ -1101,6 +1101,7 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
[[maybe_unused]] bool done = doCreateTable(create, properties);
assert(done);
ast_drop->table = create.table;
ast_drop->is_dictionary = create.is_dictionary;
ast_drop->database = create.database;
ast_drop->kind = ASTDropQuery::Drop;
created = true;
@ -1113,14 +1114,18 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
ASTRenameQuery::Table{create.database, create.table},
ASTRenameQuery::Table{create.database, table_to_replace_name}
};
ast_rename->elements.push_back(std::move(elem));
ast_rename->exchange = true;
ast_rename->dictionary = create.is_dictionary;
InterpreterRenameQuery(ast_rename, getContext()).execute();
replaced = true;
InterpreterDropQuery(ast_drop, getContext()).execute();
create.table = table_to_replace_name;
return fillTableIfNeeded(create);
}
catch (...)

View File

@ -143,7 +143,7 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot);
actions = analyzer.simpleSelectActions();
auto column_name = expr_list->children.at(0)->getColumnName();
auto column_name = expr_list->children.at(0)->getColumnName(context->getSettingsRef());
actions->removeUnusedActions(NameSet{column_name});
actions->projectInput(false);
@ -779,7 +779,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
order_descr.reserve(query.orderBy()->children.size());
for (const auto & elem : query.orderBy()->children)
{
String name = elem->children.front()->getColumnName();
String name = elem->children.front()->getColumnName(context->getSettingsRef());
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
std::shared_ptr<Collator> collator;
@ -798,14 +798,14 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
return order_descr;
}
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query, ContextPtr context)
{
SortDescription order_descr;
order_descr.reserve(query.groupBy()->children.size());
for (const auto & elem : query.groupBy()->children)
{
String name = elem->getColumnName();
String name = elem->getColumnName(context->getSettingsRef());
order_descr.emplace_back(name, 1, 1);
}
@ -1948,13 +1948,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
{
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
query_info.projection->group_by_elements_actions,
getSortDescriptionFromGroupBy(query),
getSortDescriptionFromGroupBy(query, context),
query_info.syntax_analyzer_result);
}
else
{
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result);
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query, context), query_info.syntax_analyzer_result);
}
}
@ -2033,7 +2033,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
{
auto where_step = std::make_unique<FilterStep>(
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter);
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(context->getSettingsRef()), remove_filter);
where_step->setStepDescription("WHERE");
query_plan.addStep(std::move(where_step));
@ -2080,7 +2080,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
SortDescription group_by_sort_description;
if (group_by_info && settings.optimize_aggregation_in_order)
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery(), context);
else
group_by_info = nullptr;
@ -2128,7 +2128,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
{
auto having_step
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(context->getSettingsRef()), false);
having_step->setStepDescription("HAVING");
query_plan.addStep(std::move(having_step));
@ -2144,7 +2144,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
query_plan.getCurrentDataStream(),
overflow_row,
expression,
has_having ? getSelectQuery().having()->getColumnName() : "",
has_having ? getSelectQuery().having()->getColumnName(context->getSettingsRef()) : "",
settings.totals_mode,
settings.totals_auto_threshold,
final);
@ -2461,7 +2461,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)
Names columns;
for (const auto & elem : query.limitBy()->children)
columns.emplace_back(elem->getColumnName());
columns.emplace_back(elem->getColumnName(context->getSettingsRef()));
UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);

View File

@ -39,7 +39,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
if (context->getSettingsRef().normalize_function_names)
FunctionNameNormalizer().visit(ast.get());
String name = ast->getColumnName();
String name = ast->getColumnName(context->getSettingsRef());
auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();

View File

@ -305,8 +305,16 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
}
else
{
String action = "CREATE";
if (attach)
action = "ATTACH";
else if (replace_table && create_or_replace)
action = "CREATE OR REPLACE";
else if (replace_table)
action = "REPLACE";
/// Always DICTIONARY
settings.ostr << (settings.hilite ? hilite_keyword : "") << (attach ? "ATTACH " : "CREATE ") << "DICTIONARY "
settings.ostr << (settings.hilite ? hilite_keyword : "") << action << " DICTIONARY "
<< (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "")
<< (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
if (uuid != UUIDHelpers::Nil)

View File

@ -24,6 +24,16 @@ namespace ErrorCodes
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
{
appendColumnNameImpl(ostr, nullptr);
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
{
appendColumnNameImpl(ostr, &settings);
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const
{
if (name == "view")
throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
@ -37,19 +47,30 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
{
if (it != parameters->children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
if (settings)
(*it)->appendColumnName(ostr, *settings);
else
(*it)->appendColumnName(ostr);
}
writeChar(')', ostr);
}
writeChar('(', ostr);
if (arguments)
{
for (auto it = arguments->children.begin(); it != arguments->children.end(); ++it)
{
if (it != arguments->children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
if (settings)
(*it)->appendColumnName(ostr, *settings);
else
(*it)->appendColumnName(ostr);
}
}
writeChar(')', ostr);
if (is_window_function)
@ -61,11 +82,11 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
}
else
{
FormatSettings settings{ostr, true /* one_line */};
FormatSettings format_settings{ostr, true /* one_line */};
FormatState state;
FormatStateStacked frame;
writeCString("(", ostr);
window_definition->formatImpl(settings, state, frame);
window_definition->formatImpl(format_settings, state, frame);
writeCString(")", ostr);
}
}

View File

@ -54,6 +54,10 @@ public:
protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
private:
void appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const;
};

View File

@ -17,8 +17,10 @@ void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
applyVisitor(FieldVisitorHash(hash_state), value);
}
namespace
{
/// Writes 'tuple' word before tuple literals for backward compatibility reasons.
/// TODO: remove, when versions lower than 20.3 will be rarely used.
class FieldVisitorToColumnName : public StaticVisitor<String>
{
public:
@ -46,14 +48,51 @@ String FieldVisitorToColumnName::operator() (const Tuple & x) const
return wb.str();
}
}
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
{
if (settings.legacy_column_name_of_tuple_literal)
appendColumnNameImplLegacy(ostr);
else
appendColumnNameImpl(ostr);
}
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
{
/// 100 - just arbitrary value.
constexpr auto min_elements_for_hashing = 100;
/// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them.
/// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.)
auto type = value.getType();
if ((type == Field::Types::Array && value.get<const Array &>().size() > min_elements_for_hashing)
|| (type == Field::Types::Tuple && value.get<const Tuple &>().size() > min_elements_for_hashing))
{
SipHash hash;
applyVisitor(FieldVisitorHash(hash), value);
UInt64 low, high;
hash.get128(low, high);
writeCString(type == Field::Types::Array ? "__array_" : "__tuple_", ostr);
writeText(low, ostr);
ostr.write('_');
writeText(high, ostr);
}
else
{
String column_name = applyVisitor(FieldVisitorToString(), value);
writeString(column_name, ostr);
}
}
void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const
{
/// 100 - just arbitrary value.
constexpr auto min_elements_for_hashing = 100;
/// Special case for very large arrays. Instead of listing all elements, will use hash of them.
/// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.)
/// TODO: Also do hashing for large tuples, when versions lower than 20.3 will be rarely used, because it breaks backward compatibility.
auto type = value.getType();
if ((type == Field::Types::Array && value.get<const Array &>().size() > min_elements_for_hashing))
{

View File

@ -44,6 +44,13 @@ protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
private:
/// Legacy version of 'appendColumnNameImpl'. It differs only with tuple literals.
/// It's only needed to continue working of queries with tuple literals
/// in distributed tables while rolling update.
void appendColumnNameImplLegacy(WriteBuffer & ostr) const;
};
}

View File

@ -75,12 +75,15 @@ protected:
}
settings.ostr << (settings.hilite ? hilite_keyword : "");
if (exchange)
if (exchange && dictionary)
settings.ostr << "EXCHANGE DICTIONARIES ";
else if (exchange)
settings.ostr << "EXCHANGE TABLES ";
else if (dictionary)
settings.ostr << "RENAME DICTIONARY ";
else
settings.ostr << "RENAME TABLE ";
settings.ostr << (settings.hilite ? hilite_none : "");
for (auto it = elements.cbegin(); it != elements.cend(); ++it)

View File

@ -48,6 +48,14 @@ void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
appendColumnNameImpl(ostr);
}
void ASTWithAlias::appendColumnName(WriteBuffer & ostr, const Settings & settings) const
{
if (prefer_alias_to_column_name && !alias.empty())
writeString(alias, ostr);
else
appendColumnNameImpl(ostr, settings);
}
void ASTWithAlias::appendColumnNameWithoutAlias(WriteBuffer & ostr) const
{
appendColumnNameImpl(ostr);

View File

@ -21,8 +21,10 @@ public:
using IAST::IAST;
void appendColumnName(WriteBuffer & ostr) const final;
void appendColumnName(WriteBuffer & ostr, const Settings & settings) const final;
void appendColumnNameWithoutAlias(WriteBuffer & ostr) const final;
String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; }
String getAliasOrColumnName(const Settings & settings) const override { return alias.empty() ? getColumnName(settings) : alias; }
String tryGetAlias() const override { return alias; }
void setAlias(const String & to) override { alias = to; }
@ -33,6 +35,7 @@ public:
protected:
virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
virtual void appendColumnNameImpl(WriteBuffer & ostr, const Settings &) const { appendColumnNameImpl(ostr); }
};
/// helper for setting aliases and chaining result to other functions

View File

@ -109,6 +109,14 @@ String IAST::getColumnName() const
}
String IAST::getColumnName(const Settings & settings) const
{
WriteBufferFromOwnString write_buffer;
appendColumnName(write_buffer, settings);
return write_buffer.str();
}
String IAST::getColumnNameWithoutAlias() const
{
WriteBufferFromOwnString write_buffer;

Some files were not shown because too many files have changed in this diff Show More