Merge branch 'master' into fix-receive-time-metric

This commit is contained in:
Alexey Milovidov 2021-06-27 19:00:39 +03:00
commit b11f9223de
163 changed files with 2256 additions and 1141 deletions

View File

@ -184,10 +184,27 @@ endif ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (NOT OBJCOPY_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX)
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
if (NOT OBJCOPY_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX)
find_program (OBJCOPY_PATH NAMES "objcopy" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
endif ()
endif ()
endif ()
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
message (STATUS "Using objcopy: ${OBJCOPY_PATH}")
else ()
message(FATAL_ERROR "Cannot find objcopy.")
message (FATAL_ERROR "Cannot find objcopy.")
endif ()
if (OS_DARWIN)

View File

@ -17,7 +17,7 @@ class DateLUT : private boost::noncopyable
{
public:
/// Return singleton DateLUTImpl instance for the default time zone.
static ALWAYS_INLINE const DateLUTImpl & instance()
static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071
{
const auto & date_lut = getInstance();
return *date_lut.default_impl.load(std::memory_order_acquire);

View File

@ -33,44 +33,25 @@ macro(clickhouse_embed_binaries)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
# If cross-compiling, ensure we use the toolchain file and target the
# actual target architecture
if (CMAKE_CROSSCOMPILING)
set(CROSS_COMPILE_FLAGS "--target=${CMAKE_C_COMPILER_TARGET} --gcc-toolchain=${TOOLCHAIN_FILE}")
else()
set(CROSS_COMPILE_FLAGS "")
endif()
add_library("${EMBED_TARGET}" STATIC)
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
set(RESOURCE_OBJS)
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(RESOURCE_OBJ "${RESOURCE_FILE}.o")
list(APPEND RESOURCE_OBJS "${RESOURCE_OBJ}")
# Normalize the name of the resource
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
# Normalize the name of the resource.
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
# Put the configured assembly file in the output directory.
# This is so we can clean it up as usual, and we CD to the
# source directory before compiling, so that the assembly
# `.incbin` directive can find the file.
# Generate the configured assembly file in the output directory.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Generate the output object file by compiling the assembly, in the directory of
# the sources so that the resource file may also be found
add_custom_command(
OUTPUT ${RESOURCE_OBJ}
COMMAND cd "${EMBED_RESOURCE_DIR}" &&
${CMAKE_C_COMPILER} "${CROSS_COMPILE_FLAGS}" -c -o
"${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}"
"${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}"
)
set_source_files_properties("${RESOURCE_OBJ}" PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach()
# Set the include directory for relative paths specified for `.incbin` directive.
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
add_library("${EMBED_TARGET}" STATIC ${RESOURCE_OBJS})
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
endforeach()
endmacro()

View File

@ -4,7 +4,6 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
get_filename_component (TOOLCHAIN_FILE "${CMAKE_TOOLCHAIN_FILE}" REALPATH)
# We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE)

View File

@ -26,7 +26,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
set_property (TARGET cctz PROPERTY IMPORTED_LOCATION ${LIBRARY_CCTZ})
set_property (TARGET cctz PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_CCTZ})
endif()
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")

View File

@ -1,7 +1,7 @@
add_library(murmurhash
src/murmurhash2.cpp
src/murmurhash3.cpp
include/murmurhash2.h
include/murmurhash3.h)
src/MurmurHash2.cpp
src/MurmurHash3.cpp
include/MurmurHash2.h
include/MurmurHash3.h)
target_include_directories (murmurhash PUBLIC include)

View File

@ -0,0 +1,49 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#ifndef MURMURHASH2_H
#define MURMURHASH2_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed );
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed );
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed );
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH2_H_

View File

@ -2,7 +2,10 @@
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
#ifndef MURMURHASH3_H
#define MURMURHASH3_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -23,20 +26,22 @@ typedef unsigned __int64 uint64_t;
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH3_H_

View File

@ -1,31 +0,0 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2 (const void * key, int len, uint32_t seed);
uint64_t MurmurHash64A (const void * key, int len, uint64_t seed);
uint64_t MurmurHash64B (const void * key, int len, uint64_t seed);
uint32_t MurmurHash2A (const void * key, int len, uint32_t seed);
uint32_t MurmurHashNeutral2 (const void * key, int len, uint32_t seed);
uint32_t MurmurHashAligned2 (const void * key, int len, uint32_t seed);

View File

@ -0,0 +1,523 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "MurmurHash2.h"
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = (const uint64_t *)key;
const uint64_t * end = data + (len/8);
while(data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = (const unsigned char*)data;
switch(len & 7)
{
case 7: h ^= uint64_t(data2[6]) << 48;
case 6: h ^= uint64_t(data2[5]) << 40;
case 5: h ^= uint64_t(data2[4]) << 32;
case 4: h ^= uint64_t(data2[3]) << 24;
case 3: h ^= uint64_t(data2[2]) << 16;
case 2: h ^= uint64_t(data2[1]) << 8;
case 1: h ^= uint64_t(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = uint32_t(seed) ^ len;
uint32_t h2 = uint32_t(seed >> 32);
const uint32_t * data = (const uint32_t *)key;
while(len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if(len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch(len)
{
case 3: h2 ^= ((unsigned char*)data)[2] << 16;
case 2: h2 ^= ((unsigned char*)data)[1] << 8;
case 1: h2 ^= ((unsigned char*)data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch(len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// CMurmurHash2A, by Austin Appleby
// This is a sample implementation of MurmurHash2A designed to work
// incrementally.
// Usage -
// CMurmurHash2A hasher
// hasher.Begin(seed);
// hasher.Add(data1,size1);
// hasher.Add(data2,size2);
// ...
// hasher.Add(dataN,sizeN);
// uint32_t hash = hasher.End()
class CMurmurHash2A
{
public:
void Begin ( uint32_t seed = 0 )
{
m_hash = seed;
m_tail = 0;
m_count = 0;
m_size = 0;
}
void Add ( const unsigned char * data, size_t len )
{
m_size += len;
MixTail(data,len);
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(m_hash,k);
data += 4;
len -= 4;
}
MixTail(data,len);
}
uint32_t End ( void )
{
mmix(m_hash,m_tail);
mmix(m_hash,m_size);
m_hash ^= m_hash >> 13;
m_hash *= m;
m_hash ^= m_hash >> 15;
return m_hash;
}
private:
static const uint32_t m = 0x5bd1e995;
static const int r = 24;
void MixTail ( const unsigned char * & data, size_t & len )
{
while( len && ((len<4) || m_count) )
{
m_tail |= (*data++) << (m_count * 8);
m_count++;
len--;
if(m_count == 4)
{
mmix(m_hash,m_tail);
m_tail = 0;
m_count = 0;
}
}
}
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
};
//-----------------------------------------------------------------------------
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed ^ len;
size_t align = (uint64_t)data & 3;
if(align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch(align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while(len >= 4)
{
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if(len >= align)
{
switch(align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch(len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while(len >= 4)
{
uint32_t k = *(uint32_t *)data;
MIX(h,k,m);
data += 4;
len -= 4;
}
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}
//-----------------------------------------------------------------------------

View File

@ -1,3 +1,4 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
@ -6,8 +7,8 @@
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "murmurhash3.h"
#include <cstring>
#include "MurmurHash3.h"
#include <string.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -93,7 +94,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len,
void MurmurHash3_x86_32 ( const void * key, size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -149,7 +150,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x86_128 ( const void * key, const int len,
void MurmurHash3_x86_128 ( const void * key, const size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -254,7 +255,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
void MurmurHash3_x64_128 ( const void * key, const size_t len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -332,3 +333,6 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
//-----------------------------------------------------------------------------

View File

@ -1,423 +0,0 @@
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "murmurhash2.h"
#include <cstring>
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2(const void * key, int len, uint32_t seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = reinterpret_cast<const uint64_t *>(key);
const uint64_t * end = data + (len/8);
while (data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = reinterpret_cast<const unsigned char *>(data);
switch (len & 7)
{
case 7: h ^= static_cast<uint64_t>(data2[6]) << 48;
case 6: h ^= static_cast<uint64_t>(data2[5]) << 40;
case 5: h ^= static_cast<uint64_t>(data2[4]) << 32;
case 4: h ^= static_cast<uint64_t>(data2[3]) << 24;
case 3: h ^= static_cast<uint64_t>(data2[2]) << 16;
case 2: h ^= static_cast<uint64_t>(data2[1]) << 8;
case 1: h ^= static_cast<uint64_t>(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B(const void * key, int len, uint64_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = static_cast<uint32_t>(seed) ^ len;
uint32_t h2 = static_cast<uint32_t>(seed >> 32);
const uint32_t * data = reinterpret_cast<const uint32_t *>(key);
while (len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if (len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch (len)
{
case 3: h2 ^= reinterpret_cast<const unsigned char *>(data)[2] << 16;
case 2: h2 ^= reinterpret_cast<const unsigned char *>(data)[1] << 8;
case 1: h2 ^= reinterpret_cast<const unsigned char *>(data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed;
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch (len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed ^ len;
int align = reinterpret_cast<uint64_t>(data) & 3;
if (align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch (align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while (len >= 4)
{
d = *(reinterpret_cast<const uint32_t *>(data));
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if (len >= align)
{
switch (align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch (len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
MIX(h,k,m);
data += 4;
len -= 4;
}
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}

View File

@ -72,7 +72,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin
# Build and install tools for cross-linking to Darwin (x86-64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
@ -81,8 +81,17 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
&& make install \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain and SDK for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
# Download toolchain for ARM
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.

View File

@ -3,7 +3,9 @@
set -x -e
mkdir -p build/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
mkdir -p build/cmake/toolchain/linux-aarch64
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1

View File

@ -58,6 +58,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
CLANG_PREFIX = "clang"
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
FREEBSD_SUFFIX = "-freebsd"
@ -66,9 +67,10 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
is_clang = compiler.startswith(CLANG_PREFIX)
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
is_cross_arm = compiler.endswith(ARM_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_cross_compile = is_cross_darwin or is_cross_arm or is_cross_freebsd
is_cross_compile = is_cross_darwin or is_cross_darwin_arm or is_cross_arm or is_cross_freebsd
# Explicitly use LLD with Clang by default.
# Don't force linker for cross-compilation.
@ -82,6 +84,13 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
elif is_cross_darwin_arm:
cc = compiler[:-len(DARWIN_ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
elif is_cross_arm:
cc = compiler[:-len(ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
@ -185,8 +194,8 @@ if __name__ == "__main__":
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
parser.add_argument("--output-dir", required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd",
"gcc-10"), default="clang-11")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
"clang-11-freebsd", "gcc-10"), default="clang-11")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument("--unbundled", action="store_true")
parser.add_argument("--split-binary", action="store_true")

View File

@ -561,7 +561,7 @@ if args.report == 'main':
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
if very_unstable_queries > 3:
if very_unstable_queries > 5:
error_tests += very_unstable_queries
status = 'failure'
message_array.append(str(very_unstable_queries) + ' unstable')

View File

@ -2,18 +2,16 @@
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
Minimal ClickHouse build example:
```cmake
```bash
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_C_COMPILER=$(which clang-11) \
-DCMAKE_CXX_COMPILER=$(which clang++-11) \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \

View File

@ -33,7 +33,7 @@ Reboot.
``` bash
brew update
brew install cmake ninja libtool gettext llvm gcc
brew install cmake ninja libtool gettext llvm gcc binutils
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -65,7 +65,7 @@ By checking the row count:
Query:
``` sq;
``` sql
SELECT count() FROM recipes;
```

View File

@ -1302,6 +1302,7 @@ The table below shows supported data types and how they match ClickHouse [data t
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.

View File

@ -153,5 +153,6 @@ toc_title: Adopters
| <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
| <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
| <a href="https://www.argedor.com/en/clickhouse/" class="favicon">Argedor</a> | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) |
| <a href="https://signoz.io/" class="favicon">SigNoz</a> | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -379,7 +379,7 @@ Default value: `1`.
## insert_null_as_default {#insert_null_as_default}
Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type.
Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type.
If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
@ -1182,7 +1182,7 @@ Possible values:
Default value: `1`.
**Additional Info**
**Additional Info**
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
@ -1194,21 +1194,22 @@ This setting is useful for replicated tables with a sampling key. A query may be
!!! warning "Warning"
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
## compile {#compile}
## compile_expressions {#compile-expressions}
Enable compilation of queries. By default, 0 (disabled).
Enables or disables compilation of frequently used simple functions and operators to native code with LLVM at runtime.
The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY).
If this portion of the pipeline was compiled, the query may run faster due to the deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution.
Possible values:
## min_count_to_compile {#min-count-to-compile}
- 0 — Disabled.
- 1 — Enabled.
How many times to potentially use a compiled chunk of code before running compilation. By default, 3.
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including queries that are currently running.
Default value: `1`.
Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause.
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they do not use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## min_count_to_compile_expression {#min-count-to-compile-expression}
Minimum count of executing same expression before it is get compiled.
Default value: `3`.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
@ -1558,7 +1559,7 @@ Possible values:
- 0 — Disabled (final query processing is done on the initiator node).
- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possilbe when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
**Example**
@ -1622,7 +1623,7 @@ Possible values:
Default value: 0
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shardslrewrite-in}
## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shards-rewrite-in}
Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards).
@ -2085,7 +2086,7 @@ Default value: 128.
## background_fetches_pool_size {#background_fetches_pool_size}
Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and cant be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recomended to use default value.
Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and cant be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recommended to use default value.
Possible values:
@ -2672,7 +2673,7 @@ Default value: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
Possible values:
@ -2856,7 +2857,7 @@ Default value: `0`.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
Possible values:
@ -2962,7 +2963,7 @@ Enables or disables using the original column names instead of aliases in query
Possible values:
- 0 — The column name is substituted with the alias.
- 1 — The column name is not substituted with the alias.
- 1 — The column name is not substituted with the alias.
Default value: `0`.
@ -3075,7 +3076,7 @@ SELECT
sum(a),
sumCount(b).1,
sumCount(b).2,
(sumCount(b).1) / (sumCount(b).2)
(sumCount(b).1) / (sumCount(b).2)
FROM fuse_tbl
```

View File

@ -30,14 +30,6 @@ Do not disable overcommit. The value `cat /proc/sys/vm/overcommit_memory` should
$ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
```
## Huge Pages {#huge-pages}
Always disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation.
``` bash
$ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
```
Use `perf top` to watch the time spent in the kernel for memory management.
Permanent huge pages also do not need to be allocated.
@ -91,6 +83,15 @@ The Linux kernel prior to 3.2 had a multitude of problems with IPv6 implementati
Use at least a 10 GB network, if possible. 1 Gb will also work, but it will be much worse for patching replicas with tens of terabytes of data, or for processing distributed queries with a large amount of intermediate data.
## Huge Pages {#huge-pages}
If you are using old Linux kernel, disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation.
On newer Linux kernels transparent huge pages are alright.
``` bash
$ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
```
## Hypervisor configuration
If you are using OpenStack, set

View File

@ -1,37 +0,0 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
**Syntax**
``` sql
initializeAggregation (aggregate_function, column_1, column_2)
```
**Arguments**
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
**Returned value(s)**
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
**Example**
Query:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Result:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -486,6 +486,7 @@ Example of settings:
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -503,6 +504,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```
@ -527,6 +529,8 @@ Setting fields:
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
- `fail_on_connection_loss` The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`.
MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`.
Example of settings:
@ -542,6 +546,7 @@ Example of settings:
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -558,6 +563,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```

View File

@ -831,7 +831,7 @@ Returns 0 for the first row and the difference from the previous row for each su
!!! warning "Warning"
It can reach the previous row only inside the currently processed data block.
The result of the function depends on the affected data blocks and the order of data in the block.
The rows order used during the calculation of `runningDifference` can differ from the order of rows returned to the user.
@ -908,7 +908,7 @@ Same as for [runningDifference](./other-functions.md#other_functions-runningdiff
## runningConcurrency {#runningconcurrency}
Calculates the number of concurrent events.
Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type.
Each event has a start time and an end time. The start time is included in the event, while the end time is excluded. Columns with a start time and an end time must be of the same data type.
The function calculates the total number of active (concurrent) events for each event start time.
@ -1424,11 +1424,83 @@ Result:
└───────────┴────────┘
```
## initializeAggregation {#initializeaggregation}
Calculates result of aggregate function based on single value. It is intended to use this function to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values.
**Syntax**
``` sql
initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
```
**Arguments**
- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md).
- `arg` — Arguments of aggregate function.
**Returned value(s)**
- Result of aggregation for every row passed to the function.
The return type is the same as the return type of function, that `initializeAgregation` takes as first argument.
**Example**
Query:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000));
```
Result:
```text
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘
```
Query:
```sql
SELECT finalizeAggregation(state), toTypeName(state) FROM (SELECT initializeAggregation('sumState', number % 3) AS state FROM numbers(5));
```
Result:
```text
┌─finalizeAggregation(state)─┬─toTypeName(state)─────────────┐
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
│ 2 │ AggregateFunction(sum, UInt8) │
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
└────────────────────────────┴───────────────────────────────┘
```
Example with `AggregatingMergeTree` table engine and `AggregateFunction` column:
```sql
CREATE TABLE metrics
(
key UInt64,
value AggregateFunction(sum, UInt64) DEFAULT initializeAggregation('sumState', toUInt64(0))
)
ENGINE = AggregatingMergeTree
ORDER BY key
```
```sql
INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42)))
```
**See Also**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
## finalizeAggregation {#function-finalizeaggregation}
Takes state of aggregate function. Returns result of aggregation (or finalized state when using[-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) combinator).
**Syntax**
**Syntax**
``` sql
finalizeAggregation(state)
@ -1442,7 +1514,7 @@ finalizeAggregation(state)
- Value/values that was aggregated.
Type: Value of any types that was aggregated.
Type: Value of any types that was aggregated.
**Examples**
@ -1474,7 +1546,7 @@ Result:
└──────────────────────────────────┘
```
Note that `NULL` values are ignored.
Note that `NULL` values are ignored.
Query:
@ -1520,10 +1592,9 @@ Result:
└────────┴─────────────┴────────────────┘
```
**See Also**
**See Also**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
- [initializeAggregation](../../sql-reference/aggregate-functions/reference/initializeAggregation.md)
- [initializeAggregation](#initializeaggregation)
## runningAccumulate {#runningaccumulate}

View File

@ -119,7 +119,7 @@ For manage uncompressed data cache parameters use following server level setting
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
Reset the compiled expression cache. Used in development of ClickHouse and performance tests.
Complied expression cache used when query/user/profile enable option [compile](../../operations/settings/settings.md#compile)
Compiled expression cache used when query/user/profile enable option [compile-expressions](../../operations/settings/settings.md#compile-expressions)
## FLUSH LOGS {#query_language-system-flush_logs}

View File

@ -817,22 +817,6 @@ load_balancing = first_or_random
のための一貫性を異なる部分に同じデータを分割)、このオプションにしているときだけサンプリングキーを設定します。
レプリカラグは制御されません。
## コンパイル {#compile}
を編集ます。 既定では、0(無効)です。
コンパイルは、クエリ処理パイプラインの一部にのみ使用されます。
この部分のパイプラインのためのクエリを実行するアによる展開の短サイクルinlining集計機能。 複数の単純な集計関数を使用するクエリでは、最大のパフォーマンスの向上が見られます。 通常、性能は軽微であります。 非常に珍しい例で遅くなクエリを実行します。
## min_count_to_compile {#min-count-to-compile}
り方を潜在的に利用コチャンクのコードの実行前に作成する。 デフォルトでは3.
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
値が1以上の場合、コンパイルは別のスレッドで非同期に実行されます。 結果は、現在実行中のクエリを含め、準備が整うとすぐに使用されます。
コンパイルされたコードは、クエリで使用される集計関数とGROUP BY句内のキーの種類のそれぞれの異なる組み合わせに必要です。
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
値がtrueの場合、json\*Int64およびUInt64形式ほとんどのJavaScript実装との互換性のためを使用するときに整数が引用符で表示されます。

View File

@ -348,7 +348,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
## input_format_null_as_default {#settings-input-format-null-as-default}
Включает или отключает инициализацию [значениями по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) ячеек с [NULL](../../sql-reference/syntax.md#null-literal), если тип данных столбца не позволяет [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Эта настройка используется для запросов [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) для текстовых входных форматов.
@ -361,7 +361,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
## insert_null_as_default {#insert_null_as_default}
Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`.
@ -1181,22 +1181,22 @@ load_balancing = round_robin
!!! warning "Предупреждение"
Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries).
## compile_expressions {#compile-expressions}
## compile {#compile}
Включает или выключает компиляцию часто используемых функций и операторов. Компиляция производится в нативный код платформы с помощью LLVM во время выполнения.
Включить компиляцию запросов. По умолчанию - 0 (выключено).
Возможные значения:
Компиляция предусмотрена только для части конвейера обработки запроса - для первой стадии агрегации (GROUP BY).
В случае, если эта часть конвейера была скомпилирована, запрос может работать быстрее, за счёт разворачивания коротких циклов и инлайнинга вызовов агрегатных функций. Максимальный прирост производительности (до четырёх раз в редких случаях) достигается на запросах с несколькими простыми агрегатными функциями. Как правило, прирост производительности незначителен. В очень редких случаях возможно замедление выполнения запроса.
- 0 — компиляция выключена.
- 1 — компиляция включена.
## min_count_to_compile {#min-count-to-compile}
Значение по умолчанию: `1`.
После скольких раз, когда скомпилированный кусок кода мог пригодиться, выполнить его компиляцию. По умолчанию - 3.
Для тестирования можно установить значение 0: компиляция выполняется синхронно, и запрос ожидает окончания процесса компиляции перед продолжением выполнения. Во всех остальных случаях используйте значения, начинающиеся с 1. Как правило, компиляция занимает по времени около 5-10 секунд.
В случае, если значение равно 1 или больше, компиляция выполняется асинхронно, в отдельном потоке. При готовности результата, он сразу же будет использован, в том числе, уже выполняющимися в данный момент запросами.
## min_count_to_compile_expression {#min-count-to-compile-expression}
Скомпилированный код требуется для каждого разного сочетания используемых в запросе агрегатных функций и вида ключей в GROUP BY.
Результаты компиляции сохраняются в директории build в виде .so файлов. Количество результатов компиляции не ограничено, так как они не занимают много места. При перезапуске сервера, старые результаты будут использованы, за исключением случая обновления сервера - тогда старые результаты удаляются.
Минимальное количество выполнений одного и того же выражения до его компиляции.
Значение по умолчанию: `3`.
## input_format_skip_unknown_fields {#input-format-skip-unknown-fields}
@ -2721,7 +2721,7 @@ SELECT * FROM test2;
- 0 — запрос `INSERT` добавляет данные в конец файла после существующих.
- 1 — `INSERT` удаляет имеющиеся в файле данные и замещает их новыми.
Значение по умолчанию: `0`.
Значение по умолчанию: `0`.
## allow_experimental_geo_types {#allow-experimental-geo-types}
@ -2735,7 +2735,7 @@ SELECT * FROM test2;
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`.
Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`.
Возможные значения:
@ -2813,7 +2813,7 @@ SELECT * FROM test2;
**Пример**
Какие изменения привносит включение и выключение настройки:
Какие изменения привносит включение и выключение настройки:
Запрос:

View File

@ -1,40 +0,0 @@
---
toc_priority: 150
---
## initializeAggregation {#initializeaggregation}
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
**Синтаксис**
``` sql
initializeAggregation (aggregate_function, column_1, column_2)
```
**Аргументы**
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
Пример:
Возвращаемый тип функций с суффиксом `State``AggregateFunction`.
**Пример**
Запрос:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
```
Результат:
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘

View File

@ -486,6 +486,7 @@ LIFETIME(MIN 300 MAX 360)
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -503,6 +504,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```
@ -527,6 +529,8 @@ SOURCE(MYSQL(
- `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external-dicts-dict-lifetime.md).
- `fail_on_connection_loss` параметр конфигурации, контролирующий поведение сервера при потере соединения. Если значение `true`, то исключение генерируется сразу же, если соединение между клиентом и сервером было потеряно. Если значение `false`, то сервер повторно попытается выполнить запрос три раза прежде чем сгенерировать исключение. Имейте в виду, что повторные попытки могут увеличить время выполнения запроса. Значение по умолчанию: `false`.
MySQL можно подключить на локальном хосте через сокеты, для этого необходимо задать `host` и `socket`.
Пример настройки:
@ -542,6 +546,7 @@ MySQL можно подключить на локальном хосте чер
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
</mysql>
</source>
```
@ -558,6 +563,7 @@ SOURCE(MYSQL(
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
))
```

View File

@ -13,7 +13,7 @@ toc_title: "Прочие функции"
Возвращает именованное значение из секции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурации сервера.
**Синтаксис**
**Синтаксис**
```sql
getMacro(name)
@ -854,8 +854,8 @@ WHERE diff != 1
## runningConcurrency {#runningconcurrency}
Подсчитывает количество одновременно идущих событий.
У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных.
Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке.
У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных.
Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке.
!!! warning "Предупреждение"
События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение.
@ -1371,11 +1371,84 @@ SELECT formatReadableSize(filesystemCapacity()) AS "Capacity", toTypeName(filesy
└───────────┴────────┘
```
## initializeAggregation {#initializeaggregation}
Вычисляет результат агрегатной функции для каждой строки. Предназначена для инициализации агрегатных функций с комбинатором [-State](../../sql-reference/aggregate-functions/combinators.md#state). Может быть полезна для создания состояний агрегатных функций для последующей их вставки в столбцы типа [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) или использования в качестве значений по-умолчанию.
**Синтаксис**
``` sql
initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
```
**Аргументы**
- `aggregate_function` — название агрегатной функции, состояние которой нужно создать. [String](../../sql-reference/data-types/string.md#string).
- `arg` — аргументы, которые передаются в агрегатную функцию.
**Возвращаемое значение**
- В каждой строке результат агрегатной функции, примененной к аргументам из этой строки.
Тип возвращаемого значения такой же, как и у функции, переданной первым аргументом.
**Пример**
Запрос:
```sql
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM numbers(10000));
```
Результат:
```text
┌─uniqMerge(state)─┐
│ 3 │
└──────────────────┘
```
Запрос:
```sql
SELECT finalizeAggregation(state), toTypeName(state) FROM (SELECT initializeAggregation('sumState', number % 3) AS state FROM numbers(5));
```
Результат:
```text
┌─finalizeAggregation(state)─┬─toTypeName(state)─────────────┐
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
│ 2 │ AggregateFunction(sum, UInt8) │
│ 0 │ AggregateFunction(sum, UInt8) │
│ 1 │ AggregateFunction(sum, UInt8) │
└────────────────────────────┴───────────────────────────────┘
```
Пример с движком таблиц `AggregatingMergeTree` и столбцом типа `AggregateFunction`:
```sql
CREATE TABLE metrics
(
key UInt64,
value AggregateFunction(sum, UInt64) DEFAULT initializeAggregation('sumState', toUInt64(0))
)
ENGINE = AggregatingMergeTree
ORDER BY key
```
```sql
INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42)))
```
**Смотрите также**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
## finalizeAggregation {#function-finalizeaggregation}
Принимает состояние агрегатной функции. Возвращает результат агрегирования (или конечное состояние при использовании комбинатора [-State](../../sql-reference/aggregate-functions/combinators.md#state)).
**Синтаксис**
**Синтаксис**
``` sql
finalizeAggregation(state)
@ -1421,7 +1494,7 @@ SELECT finalizeAggregation(( SELECT sumState(number) FROM numbers(10)));
└──────────────────────────────────┘
```
Обратите внимание, что значения `NULL` игнорируются.
Обратите внимание, что значения `NULL` игнорируются.
Запрос:
@ -1470,7 +1543,7 @@ FROM numbers(10);
**Смотрите также**
- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce)
- [initializeAggregation](../../sql-reference/aggregate-functions/reference/initializeAggregation.md)
- [initializeAggregation](#initializeaggregation)
## runningAccumulate {#runningaccumulate}
@ -1537,13 +1610,13 @@ SELECT k, runningAccumulate(sum_k) AS res FROM (SELECT number as k, sumState(k)
Запрос:
```sql
SELECT
SELECT
grouping,
item,
runningAccumulate(state, grouping) AS res
FROM
FROM
(
SELECT
SELECT
toInt8(number / 4) AS grouping,
number AS item,
sumState(number) AS state
@ -1732,7 +1805,7 @@ SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers
randomString(length)
```
**Аргументы**
**Аргументы**
- `length` — длина строки. Положительное целое число.
@ -1831,13 +1904,13 @@ randomStringUTF8(length)
Запрос:
```sql
```sql
SELECT randomStringUTF8(13)
```
Результат:
```text
```text
┌─randomStringUTF8(13)─┐
│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │
└──────────────────────┘
@ -1848,13 +1921,13 @@ SELECT randomStringUTF8(13)
Возвращает текущее значение [пользовательской настройки](../../operations/settings/index.md#custom_settings).
**Синтаксис**
**Синтаксис**
```sql
getSetting('custom_setting')
```
**Параметр**
**Параметр**
- `custom_setting` — название настройки. [String](../../sql-reference/data-types/string.md).
@ -1866,7 +1939,7 @@ getSetting('custom_setting')
```sql
SET custom_a = 123;
SELECT getSetting('custom_a');
SELECT getSetting('custom_a');
```
**Результат**
@ -1875,7 +1948,7 @@ SELECT getSetting('custom_a');
123
```
**См. также**
**См. также**
- [Пользовательские настройки](../../operations/settings/index.md#custom_settings)
@ -1889,10 +1962,10 @@ SELECT getSetting('custom_a');
isDecimalOverflow(d, [p])
```
**Аргументы**
**Аргументы**
- `d` — число. [Decimal](../../sql-reference/data-types/decimal.md).
- `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
- `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Возвращаемое значение**
@ -1926,7 +1999,7 @@ SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9),
countDigits(x)
```
**Аргументы**
**Аргументы**
- `x` — [целое](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) или [дробное](../../sql-reference/data-types/decimal.md) число.

View File

@ -5,14 +5,14 @@ toc_title: SYSTEM
# Запросы SYSTEM {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [RELOAD MODELS](#query_language-system-reload-models)
- [RELOAD MODEL](#query_language-system-reload-model)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs)
@ -24,10 +24,10 @@ toc_title: SYSTEM
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
@ -36,13 +36,13 @@ toc_title: SYSTEM
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
Перегружает все [Встроенные словари](../dictionaries/internal-dicts.md).
По умолчанию встроенные словари выключены.
По умолчанию встроенные словари выключены.
Всегда возвращает `Ok.`, вне зависимости от результата обновления встроенных словарей.
## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
Перегружает все словари, которые были успешно загружены до этого.
@ -115,7 +115,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
Сбрасывает кеш скомпилированных выражений. Используется при разработке ClickHouse и тестах производительности.
Компилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile](../../operations/settings/settings.md#compile)
омпилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile-expressions](../../operations/settings/settings.md#compile-expressions)
## FLUSH LOGS {#query_language-system-flush_logs}
@ -194,7 +194,7 @@ SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name
SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
```
### START TTL MERGES {#query_language-start-ttl-merges}
### START TTL MERGES {#query_language-start-ttl-merges}
Запускает фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -203,7 +203,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
```
### STOP MOVES {#query_language-stop-moves}
### STOP MOVES {#query_language-stop-moves}
Позволяет остановить фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -212,7 +212,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
```
### START MOVES {#query_language-start-moves}
### START MOVES {#query_language-start-moves}
Запускает фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family:
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
@ -261,7 +261,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
@ -269,7 +269,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
@ -277,7 +277,7 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
### SYNC REPLICA {#query_language-system-sync-replica}
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени:
``` sql
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name

View File

@ -47,6 +47,13 @@ When all prerequisites are installed, running `build.py` without args (there are
The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888.
## How to change code highlighting? {#how-to-change-code-hl}
ClickHouse does not use mkdocs `highlightjs` feature. It uses modified pygments styles instead.
If you want to change code highlighting, edit the `website/css/highlight.css` file.
Currently, an [eighties](https://github.com/idleberg/base16-pygments/blob/master/css/base16-eighties.dark.css) theme
is used.
## How to subscribe on documentation changes? {#how-to-subscribe-on-documentation-changes}
At the moment theres no easy way to do just that, but you can consider:

View File

@ -87,6 +87,7 @@ def build_for_lang(lang, args):
website_url = 'https://clickhouse.tech'
site_name = site_names.get(lang, site_names['en']) % ''
site_name = site_name.replace(' ', ' ')
raw_config = dict(
site_name=site_name,
site_url=f'{website_url}/docs/{lang}/',

View File

@ -817,21 +817,22 @@ load_balancing = first_or_random
为了保持一致性(以获取相同数据拆分的不同部分),此选项仅在设置了采样键时有效。
副本滞后不受控制。
## 编译 {#compile}
## compile_expressions {#compile-expressions}
启用查询的编译。 默认情况下0禁用
啟用或禁用在運行時使用 LLVM 將常用的簡單函數和運算符編譯為本機代碼
编译仅用于查询处理管道的一部分用于聚合的第一阶段GROUP BY
如果编译了管道的这一部分,则由于部署周期较短和内联聚合函数调用,查询可能运行得更快。 对于具有多个简单聚合函数的查询,可以看到最大的性能改进(在极少数情况下可快四倍)。 通常,性能增益是微不足道的。 在极少数情况下,它可能会减慢查询执行速度。
可能的值:
## min_count_to_compile {#min-count-to-compile}
- 0 — 禁用。
- 1 — 啟用。
在运行编译之前可能使用已编译代码块的次数。 默认情况下3。
For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values starting with 1. Compilation normally takes about 5-10 seconds.
如果该值为1或更大则编译在单独的线程中异步进行。 结果将在准备就绪后立即使用,包括当前正在运行的查询。
默認值:`1`。
对于查询中使用的聚合函数的每个不同组合以及GROUP BY子句中的键类型都需要编译代码。
The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## min_count_to_compile_expression {#min-count-to-compile-expression}
在編譯之前執行相同表達式的最小計數。
默認值:`3`。
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}

View File

@ -95,7 +95,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
重置已编译的表达式缓存。用于ClickHouse开发和性能测试。
`query/user/profile` 启用配置项 [compile](../../operations/settings/settings.md#compile)时,编译的表达式缓存开启。
`query/user/profile` 启用配置项 [compile-expressions](../../operations/settings/settings.md#compile-expressions)时,编译的表达式缓存开启。
## FLUSH LOGS {#query_language-system-flush_logs}
@ -209,7 +209,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
### STOP FETCHES {#query_language-system-stop-fetches}
停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
@ -218,7 +218,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
### START FETCHES {#query_language-system-start-fetches}
启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
@ -227,7 +227,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
@ -235,7 +235,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]

View File

@ -577,7 +577,18 @@ private:
}
if (!history_file.empty() && !fs::exists(history_file))
FS::createFile(history_file);
{
/// Avoid TOCTOU issue.
try
{
FS::createFile(history_file);
}
catch (const ErrnoException & e)
{
if (e.getErrno() != EEXIST)
throw;
}
}
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {";", "\\G"};
@ -2435,6 +2446,8 @@ public:
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}

View File

@ -324,6 +324,13 @@ Poco::Net::SocketAddress Server::socketBindListen(Poco::Net::ServerSocket & sock
socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config().getBool("listen_reuse_port", false));
#endif
/// If caller requests any available port from the OS, discover it after binding.
if (port == 0)
{
address = socket.address();
LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port());
}
socket.listen(/* backlog = */ config().getUInt("listen_backlog", 64));
return address;
@ -390,7 +397,7 @@ void Server::initialize(Poco::Util::Application & self)
BaseDaemon::initialize(self);
logger().information("starting up");
LOG_INFO(&logger(), "OS Name = {}, OS Version = {}, OS Architecture = {}",
LOG_INFO(&logger(), "OS name: {}, version: {}, architecture: {}",
Poco::Environment::osName(),
Poco::Environment::osVersion(),
Poco::Environment::osArchitecture());

View File

@ -30,16 +30,16 @@ static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argume
template <typename Trait, typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataTypePtr & argument_type, TArgs ... args)
inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, Trait>(*argument_type, argument_type, std::forward<TArgs>(args)...))
if (auto res = createWithNumericOrTimeType<GroupArrayNumericImpl, Trait>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
WhichDataType which(argument_type);
if (which.idx == TypeIndex::String)
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeString, Trait>>(argument_type, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeString, Trait>>(argument_type, parameters, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeGeneral, Trait>>(argument_type, std::forward<TArgs>(args)...);
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeGeneral, Trait>>(argument_type, parameters, std::forward<TArgs>(args)...);
// Link list implementation doesn't show noticeable performance improvement
// if (which.idx == TypeIndex::String)
@ -79,9 +79,9 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!limit_size)
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<false, Sampler::NONE>>(argument_types[0]);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<false, Sampler::NONE>>(argument_types[0], parameters);
else
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::NONE>>(argument_types[0], max_elems);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::NONE>>(argument_types[0], parameters, max_elems);
}
AggregateFunctionPtr createAggregateFunctionGroupArraySample(
@ -114,7 +114,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
else
seed = thread_local_rng();
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::RNG>>(argument_types[0], max_elems, seed);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait<true, Sampler::RNG>>(argument_types[0], parameters, max_elems, seed);
}
}

View File

@ -119,9 +119,9 @@ class GroupArrayNumericImpl final
public:
explicit GroupArrayNumericImpl(
const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, {})
{data_type_}, parameters_)
, max_elems(max_elems_)
, seed(seed_)
{
@ -421,9 +421,9 @@ class GroupArrayGeneralImpl final
UInt64 seed;
public:
GroupArrayGeneralImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
{data_type_}, {})
{data_type_}, parameters_)
, data_type(this->argument_types[0])
, max_elems(max_elems_)
, seed(seed_)
@ -696,8 +696,8 @@ class GroupArrayGeneralListImpl final
UInt64 max_elems;
public:
GroupArrayGeneralListImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, Trait>>({data_type_}, {})
GroupArrayGeneralListImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<GroupArrayGeneralListData<Node>, GroupArrayGeneralListImpl<Node, Trait>>({data_type_}, parameters_)
, data_type(this->argument_types[0])
, max_elems(max_elems_)
{

View File

@ -1,6 +1,7 @@
#pragma once
#include <algorithm>
#include <memory>
#include <boost/noncopyable.hpp>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -43,7 +44,7 @@ private:
void toLarge()
{
rb = std::make_shared<RoaringBitmap>();
rb = std::make_unique<RoaringBitmap>();
for (const auto & x : small)
rb->add(static_cast<Value>(x.getValue()));
small.clear();
@ -113,7 +114,7 @@ public:
readVarUInt(size, in);
std::unique_ptr<char[]> buf(new char[size]);
in.readStrict(buf.get(), size);
rb = std::make_shared<RoaringBitmap>(RoaringBitmap::read(buf.get()));
rb = std::make_unique<RoaringBitmap>(RoaringBitmap::read(buf.get()));
}
}
@ -140,7 +141,7 @@ public:
*/
std::shared_ptr<RoaringBitmap> getNewRoaringBitmapFromSmall() const
{
std::shared_ptr<RoaringBitmap> ret = std::make_shared<RoaringBitmap>();
std::shared_ptr<RoaringBitmap> ret = std::make_unique<RoaringBitmap>();
for (const auto & x : small)
ret->add(static_cast<Value>(x.getValue()));
return ret;

View File

@ -1,6 +1,7 @@
#pragma once
#include <string>
#include <optional>
#include <vector>
#include <boost/noncopyable.hpp>
#include <unordered_map>

View File

@ -60,7 +60,8 @@ struct ThreadStack
void * getData() const { return data; }
private:
static constexpr size_t size = 16 << 10; /// 16 KiB - not too big but enough to handle error.
/// 16 KiB - not too big but enough to handle error.
static constexpr size_t size = std::max<size_t>(16 << 10, MINSIGSTKSZ);
void * data;
};

View File

@ -44,7 +44,7 @@ size_t getStackSize(void ** out_address)
size = pthread_main_np() ? (8 * 1024 * 1024) : pthread_get_stacksize_np(thread);
// stack address points to the start of the stack, not the end how it's returned by pthread_get_stackaddr_np
address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(thread)) - max_stack_size);
address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(thread)) - size);
#else
pthread_attr_t attr;
# if defined(__FreeBSD__) || defined(OS_SUNOS)

View File

@ -2,6 +2,7 @@
#include <map>
#include <list>
#include <optional>
#include <string>
#include <set>
#include <initializer_list>

View File

@ -469,6 +469,7 @@ class IColumn;
M(UnionMode, union_default_mode, UnionMode::Unspecified, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \
\
M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int FILE_ALREADY_EXISTS;
extern const int INCORRECT_QUERY;
extern const int ABORTED;
}
class AtomicDatabaseTablesSnapshotIterator final : public DatabaseTablesSnapshotIterator
@ -210,7 +211,7 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_
std::unique_lock<std::mutex> other_db_lock;
if (inside_database)
db_lock = std::unique_lock{mutex};
else if (this < &other_db)
else if (this < &other_db)
{
db_lock = std::unique_lock{mutex};
other_db_lock = std::unique_lock{other_db.mutex};
@ -420,7 +421,18 @@ void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, bool has
{
/// Recreate symlinks to table data dirs in case of force restore, because some of them may be broken
if (has_force_restore_data_flag)
fs::remove_all(path_to_table_symlinks);
{
for (const auto & table_path : fs::directory_iterator(path_to_table_symlinks))
{
if (!fs::is_symlink(table_path))
{
throw Exception(ErrorCodes::ABORTED,
"'{}' is not a symlink. Atomic database should contains only symlinks.", std::string(table_path.path()));
}
fs::remove(table_path);
}
}
DatabaseOrdinary::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach);

View File

@ -883,6 +883,7 @@ void DiskS3::restoreFileOperations(const RestoreInformation & restore_informatio
to_path /= from_path.parent_path().filename();
else
to_path /= from_path.filename();
fs::create_directories(to_path);
fs::copy(from_path, to_path, fs::copy_options::recursive | fs::copy_options::overwrite_existing);
fs::remove_all(from_path);
}

View File

@ -992,7 +992,7 @@ public:
UInt8 byte = x >> offset;
/// Leading zeros.
if (byte == 0 && !was_nonzero && offset)
if (byte == 0 && !was_nonzero && offset) // -V560
continue;
was_nonzero = true;

View File

@ -3,9 +3,10 @@
#include <city.h>
#include <farmhash.h>
#include <metrohash.h>
#include <MurmurHash2.h>
#include <MurmurHash3.h>
#if !defined(ARCADIA_BUILD)
# include <murmurhash2.h>
# include <murmurhash3.h>
# include "config_functions.h"
# include "config_core.h"
#endif

View File

@ -15,6 +15,7 @@ ADDINCL(
contrib/libs/libdivide
contrib/libs/rapidjson/include
contrib/libs/xxhash
contrib/restricted/murmurhash
)
PEERDIR(
@ -30,6 +31,7 @@ PEERDIR(
contrib/libs/metrohash
contrib/libs/rapidjson
contrib/libs/xxhash
contrib/restricted/murmurhash
library/cpp/consistent_hashing
)

View File

@ -14,6 +14,7 @@ ADDINCL(
contrib/libs/libdivide
contrib/libs/rapidjson/include
contrib/libs/xxhash
contrib/restricted/murmurhash
)
PEERDIR(
@ -29,6 +30,7 @@ PEERDIR(
contrib/libs/metrohash
contrib/libs/rapidjson
contrib/libs/xxhash
contrib/restricted/murmurhash
library/cpp/consistent_hashing
)

View File

@ -184,7 +184,7 @@ inline bool checkString(const String & s, ReadBuffer & buf)
return checkString(s.c_str(), buf);
}
inline bool checkChar(char c, ReadBuffer & buf)
inline bool checkChar(char c, ReadBuffer & buf) // -V1071
{
if (buf.eof() || *buf.position() != c)
return false;
@ -393,7 +393,7 @@ void readIntText(T & x, ReadBuffer & buf)
}
template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
bool tryReadIntText(T & x, ReadBuffer & buf)
bool tryReadIntText(T & x, ReadBuffer & buf) // -V1071
{
return readIntTextImpl<T, bool, check_overflow>(x, buf);
}
@ -1248,7 +1248,7 @@ bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current);
struct PcgDeserializer
{
static void deserializePcg32(const pcg32_fast & rng, ReadBuffer & buf)
static void deserializePcg32(pcg32_fast & rng, ReadBuffer & buf)
{
decltype(rng.state_) multiplier, increment, state;
readText(multiplier, buf);
@ -1261,6 +1261,8 @@ struct PcgDeserializer
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", rng.multiplier(), multiplier);
if (increment != rng.increment())
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", rng.increment(), increment);
rng.state_ = state;
}
};

View File

@ -212,6 +212,7 @@ public:
/// Conversion should be possible with only usage of CAST function and renames.
/// @param ignore_constant_values - Do not check that constants are same. Use value from result_header.
/// @param add_casted_columns - Create new columns with converted values instead of replacing original.
/// @param new_names - Output parameter for new column names when add_casted_columns is used.
static ActionsDAGPtr makeConvertingActions(
const ColumnsWithTypeAndName & source,
const ColumnsWithTypeAndName & result,

View File

@ -348,7 +348,7 @@ SetPtr makeExplicitSet(
const ASTPtr & left_arg = args.children.at(0);
const ASTPtr & right_arg = args.children.at(1);
auto column_name = left_arg->getColumnName();
auto column_name = left_arg->getColumnName(context->getSettingsRef());
const auto & dag_node = actions.findInIndex(column_name);
const DataTypePtr & left_arg_type = dag_node.result_type;
@ -641,7 +641,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
{
// If the argument is a literal, we generated a unique column name for it.
// Use it instead of a generic display name.
auto child_column_name = ast->getColumnName();
auto child_column_name = ast->getColumnName(data.getContext()->getSettingsRef());
const auto * as_literal = ast->as<ASTLiteral>();
if (as_literal)
{
@ -703,7 +703,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
func->setAlias(data.getUniqueName("_ut_" + name));
auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName());
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef()));
columns.push_back(std::move(func));
}
@ -740,6 +740,7 @@ void ActionsMatcher::visit(ASTExpressionList & expression_list, const ASTPtr &,
void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Data & data)
{
auto column_name = identifier.getColumnName();
if (data.hasColumn(column_name))
return;
@ -766,7 +767,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Dat
void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
{
auto column_name = ast->getColumnName();
auto column_name = ast->getColumnName(data.getContext()->getSettingsRef());
if (data.hasColumn(column_name))
return;
@ -782,7 +783,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
ASTPtr arg = node.arguments->children.at(0);
visit(arg, data);
if (!data.only_consts)
data.addArrayJoin(arg->getColumnName(), column_name);
data.addArrayJoin(arg->getColumnName(data.getContext()->getSettingsRef()), column_name);
return;
}
@ -804,7 +805,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
/// We are in the part of the tree that we are not going to compute. You just need to define types.
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
auto argument_name = node.arguments->children.at(0)->getColumnName();
auto argument_name = node.arguments->children.at(0)->getColumnName(data.getContext()->getSettingsRef());
data.addFunction(
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
@ -933,7 +934,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (!prepared_set->empty())
column.name = data.getUniqueName("__set");
else
column.name = child->getColumnName();
column.name = child->getColumnName(data.getContext()->getSettingsRef());
if (!data.hasColumn(column.name))
{
@ -1012,7 +1013,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
visit(lambda->arguments->children.at(1), data);
auto lambda_dag = data.actions_stack.popLevel();
String result_name = lambda->arguments->children.at(1)->getColumnName();
String result_name = lambda->arguments->children.at(1)->getColumnName(data.getContext()->getSettingsRef());
lambda_dag->removeUnusedActions(Names(1, result_name));
auto lambda_actions = std::make_shared<ExpressionActions>(
@ -1027,7 +1028,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
captured.push_back(required_arg);
/// We can not name `getColumnName()`,
/// We can not name `getColumnName(data.getContext()->getSettingsRef())`,
/// because it does not uniquely define the expression (the types of arguments can be different).
String lambda_name = data.getUniqueName("__lambda");
@ -1057,7 +1058,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (arguments_present)
{
/// Calculate column name here again, because AST may be changed here (in case of untuple).
data.addFunction(function_builder, argument_names, ast->getColumnName());
data.addFunction(function_builder, argument_names, ast->getColumnName(data.getContext()->getSettingsRef()));
}
}
@ -1071,7 +1072,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
// AST here? Anyway, do not modify the column name if it is set already.
if (literal.unique_column_name.empty())
{
const auto default_name = literal.getColumnName();
const auto default_name = literal.getColumnName(data.getContext()->getSettingsRef());
const auto & index = data.actions_stack.getLastActionsIndex();
const auto * existing_column = index.tryGetNode(default_name);
@ -1151,7 +1152,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
}
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
String set_id = right_in_operand->getColumnName();
String set_id = right_in_operand->getColumnName(data.getContext()->getSettingsRef());
SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];
@ -1187,7 +1188,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
{
const auto & last_actions = data.actions_stack.getLastActions();
const auto & index = data.actions_stack.getLastActionsIndex();
if (index.contains(left_in_operand->getColumnName()))
if (index.contains(left_in_operand->getColumnName(data.getContext()->getSettingsRef())))
/// An explicit enumeration of values in parentheses.
return makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, data.prepared_sets);
else

View File

@ -179,7 +179,7 @@ String Cluster::Address::toFullString(bool use_compact_format) const
// shard_num/replica_num like in system.clusters table
throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR);
return "shard" + std::to_string(shard_index) + "_replica" + std::to_string(replica_index);
return fmt::format("shard{}_replica{}", shard_index, replica_index);
}
else
{
@ -199,7 +199,7 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string)
const char * user_pw_end = strchr(full_string.data(), '@');
/// parsing with the new [shard{shard_index}[_replica{replica_index}]] format
/// parsing with the new shard{shard_index}[_replica{replica_index}] format
if (!user_pw_end && startsWith(full_string, "shard"))
{
const char * underscore = strchr(full_string.data(), '_');
@ -401,6 +401,9 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
ShardInfoInsertPathForInternalReplication insert_paths;
/// "_all_replicas" is a marker that will be replaced with all replicas
/// (for creating connections in the Distributed engine)
insert_paths.compact = fmt::format("shard{}_all_replicas", current_shard_num);
for (const auto & replica_key : replica_keys)
{
@ -419,20 +422,10 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
if (internal_replication)
{
/// use_compact_format=0
{
auto dir_name = replica_addresses.back().toFullString(false /* use_compact_format */);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name);
}
/// use_compact_format=1
{
auto dir_name = replica_addresses.back().toFullString(true /* use_compact_format */);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica_compact, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica_compact, dir_name);
}
auto dir_name = replica_addresses.back().toFullString(/* use_compact_format= */ false);
if (!replica_addresses.back().is_local)
concatInsertPath(insert_paths.prefer_localhost_replica, dir_name);
concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name);
}
}
else
@ -660,17 +653,17 @@ const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool pr
const auto & paths = insert_path_for_internal_replication;
if (!use_compact_format)
{
if (prefer_localhost_replica)
return paths.prefer_localhost_replica;
else
return paths.no_prefer_localhost_replica;
const auto & path = prefer_localhost_replica ? paths.prefer_localhost_replica : paths.no_prefer_localhost_replica;
if (path.size() > NAME_MAX)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Path '{}' for async distributed INSERT is too long (exceed {} limit)", path, NAME_MAX);
}
return path;
}
else
{
if (prefer_localhost_replica)
return paths.prefer_localhost_replica_compact;
else
return paths.no_prefer_localhost_replica_compact;
return paths.compact;
}
}

View File

@ -166,10 +166,8 @@ public:
std::string prefer_localhost_replica;
/// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=0
std::string no_prefer_localhost_replica;
/// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=1
std::string prefer_localhost_replica_compact;
/// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=1
std::string no_prefer_localhost_replica_compact;
/// use_compact_format_in_distributed_parts_names=1
std::string compact;
};
struct ShardInfo

View File

@ -394,7 +394,7 @@ struct ContextSharedPart
/// Clusters for distributed tables
/// Initialized on demand (on distributed storages initialization) since Settings should be initialized
std::unique_ptr<Clusters> clusters;
std::shared_ptr<Clusters> clusters;
ConfigurationPtr clusters_config; /// Stores updated configs
mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config
@ -1882,7 +1882,7 @@ std::optional<UInt16> Context::getTCPPortSecure() const
std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) const
{
auto res = getClusters().getCluster(cluster_name);
auto res = getClusters()->getCluster(cluster_name);
if (res)
return res;
@ -1896,7 +1896,7 @@ std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) c
std::shared_ptr<Cluster> Context::tryGetCluster(const std::string & cluster_name) const
{
return getClusters().getCluster(cluster_name);
return getClusters()->getCluster(cluster_name);
}
@ -1911,7 +1911,7 @@ void Context::reloadClusterConfig() const
}
const auto & config = cluster_config ? *cluster_config : getConfigRef();
auto new_clusters = std::make_unique<Clusters>(config, settings);
auto new_clusters = std::make_shared<Clusters>(config, settings);
{
std::lock_guard lock(shared->clusters_mutex);
@ -1927,16 +1927,16 @@ void Context::reloadClusterConfig() const
}
Clusters & Context::getClusters() const
std::shared_ptr<Clusters> Context::getClusters() const
{
std::lock_guard lock(shared->clusters_mutex);
if (!shared->clusters)
{
const auto & config = shared->clusters_config ? *shared->clusters_config : getConfigRef();
shared->clusters = std::make_unique<Clusters>(config, settings);
shared->clusters = std::make_shared<Clusters>(config, settings);
}
return *shared->clusters;
return shared->clusters;
}

View File

@ -676,7 +676,7 @@ public:
void setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker);
DDLWorker & getDDLWorker() const;
Clusters & getClusters() const;
std::shared_ptr<Clusters> getClusters() const;
std::shared_ptr<Cluster> getCluster(const std::string & cluster_name) const;
std::shared_ptr<Cluster> tryGetCluster(const std::string & cluster_name) const;
void setClustersConfig(const ConfigurationPtr & config, const String & config_name = "remote_servers");

View File

@ -253,7 +253,7 @@ struct ExpressionActionsChain : WithContext
steps.clear();
}
ActionsDAGPtr getLastActions(bool allow_empty = false)
ActionsDAGPtr getLastActions(bool allow_empty = false) // -V1071
{
if (steps.empty())
{

View File

@ -244,7 +244,7 @@ void ExpressionAnalyzer::analyzeAggregation()
ssize_t size = group_asts.size();
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName();
const auto & column_name = group_asts[i]->getColumnName(getContext()->getSettingsRef());
const auto * node = temp_actions->tryFindInIndex(column_name);
if (!node)
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
@ -398,7 +398,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(left_in_operand, true, temp_actions);
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName(getContext()->getSettingsRef())))
makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, prepared_sets);
}
}
@ -446,7 +446,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, true, actions);
aggregate.column_name = node->getColumnName();
aggregate.column_name = node->getColumnName(getContext()->getSettingsRef());
const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
aggregate.argument_names.resize(arguments.size());
@ -454,7 +454,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
const auto * dag_node = actions->tryFindInIndex(name);
if (!dag_node)
{
@ -647,7 +647,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
WindowFunctionDescription window_function;
window_function.function_node = function_node;
window_function.column_name
= window_function.function_node->getColumnName();
= window_function.function_node->getColumnName(getContext()->getSettingsRef());
window_function.function_parameters
= window_function.function_node->parameters
? getAggregateFunctionParametersArray(
@ -666,7 +666,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
window_function.argument_names.resize(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName();
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
const auto * node = actions->tryFindInIndex(name);
if (!node)
@ -964,7 +964,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
auto & step = chain.lastStep(sourceColumns());
getRootActions(select_query->prewhere(), only_types, step.actions());
String prewhere_column_name = select_query->prewhere()->getColumnName();
String prewhere_column_name = select_query->prewhere()->getColumnName(getContext()->getSettingsRef());
step.addRequiredOutput(prewhere_column_name);
const auto & node = step.actions()->findInIndex(prewhere_column_name);
@ -1061,7 +1061,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
getRootActions(select_query->where(), only_types, step.actions());
auto where_column_name = select_query->where()->getColumnName();
auto where_column_name = select_query->where()->getColumnName(getContext()->getSettingsRef());
step.addRequiredOutput(where_column_name);
const auto & node = step.actions()->findInIndex(where_column_name);
@ -1086,7 +1086,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
ASTs asts = select_query->groupBy()->children;
for (const auto & ast : asts)
{
step.addRequiredOutput(ast->getColumnName());
step.addRequiredOutput(ast->getColumnName(getContext()->getSettingsRef()));
getRootActions(ast, only_types, step.actions());
}
@ -1114,7 +1114,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
for (const auto & name : desc.argument_names)
step.addRequiredOutput(name);
/// Collect aggregates removing duplicates by node.getColumnName()
/// Collect aggregates removing duplicates by node.getColumnName(getContext()->getSettingsRef())
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
/// @note The original recollection logic didn't remove duplicates.
GetAggregatesVisitor::Data data;
@ -1169,7 +1169,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// (2b) Required function argument columns.
for (const auto & a : f.function_node->arguments->children)
{
step.addRequiredOutput(a->getColumnName());
step.addRequiredOutput(a->getColumnName(getContext()->getSettingsRef()));
}
}
@ -1191,7 +1191,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
getRootActionsForHaving(select_query->having(), only_types, step.actions());
step.addRequiredOutput(select_query->having()->getColumnName());
step.addRequiredOutput(select_query->having()->getColumnName(getContext()->getSettingsRef()));
return true;
}
@ -1215,7 +1215,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
continue;
}
step.addRequiredOutput(child->getColumnName());
step.addRequiredOutput(child->getColumnName(getContext()->getSettingsRef()));
}
}
@ -1243,7 +1243,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
if (!ast || ast->children.empty())
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children.at(0);
step.addRequiredOutput(order_expression->getColumnName());
step.addRequiredOutput(order_expression->getColumnName(getContext()->getSettingsRef()));
if (ast->with_fill)
with_fill = true;
@ -1293,7 +1293,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
for (const auto & child : select_query->limitBy()->children)
{
auto child_name = child->getColumnName();
auto child_name = child->getColumnName(getContext()->getSettingsRef());
if (!aggregated_names.count(child_name))
step.addRequiredOutput(std::move(child_name));
}
@ -1309,13 +1309,15 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
NamesWithAliases result_columns;
const auto & settings = getContext()->getSettingsRef();
ASTs asts = select_query->select()->children;
for (const auto & ast : asts)
{
String result_name = ast->getAliasOrColumnName();
String result_name = ast->getAliasOrColumnName(settings);
if (required_result_columns.empty() || required_result_columns.count(result_name))
{
std::string source_name = ast->getColumnName();
std::string source_name = ast->getColumnName(settings);
/*
* For temporary columns created by ExpressionAnalyzer for literals,
@ -1357,7 +1359,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
{
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
getRootActions(expr, only_types, step.actions());
step.addRequiredOutput(expr->getColumnName());
step.addRequiredOutput(expr->getColumnName(getContext()->getSettingsRef()));
}
@ -1374,12 +1376,13 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
else
asts = ASTs(1, query);
const auto & settings = getContext()->getSettingsRef();
for (const auto & ast : asts)
{
std::string name = ast->getColumnName();
std::string name = ast->getColumnName(settings);
std::string alias;
if (add_aliases)
alias = ast->getAliasOrColumnName();
alias = ast->getAliasOrColumnName(settings);
else
alias = name;
result_columns.emplace_back(name, alias);
@ -1514,7 +1517,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
{
prewhere_info = std::make_shared<PrewhereDAGInfo>(actions, query.prewhere()->getColumnName());
prewhere_info = std::make_shared<PrewhereDAGInfo>(actions, query.prewhere()->getColumnName(settings));
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
{
@ -1524,7 +1527,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
ExpressionActions(
prewhere_info->prewhere_actions,
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample);
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName(settings));
/// If the filter column is a constant, record it.
if (column_elem.column)
prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1559,7 +1562,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
ExpressionActions(
before_where,
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName(settings));
/// If the filter column is a constant, record it.
if (column_elem.column)
where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1650,7 +1653,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
const auto * select_query = query_analyzer.getSelectQuery();
for (const auto & child : select_query->select()->children)
{
step.addRequiredOutput(child->getColumnName());
step.addRequiredOutput(child->getColumnName(settings));
}
}
@ -1706,7 +1709,8 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
if (hasWhere())
{
where_column_name = query.where()->getColumnName();
const auto & settings = chain.getContext()->getSettingsRef();
where_column_name = query.where()->getColumnName(settings);
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
}
}

View File

@ -1101,6 +1101,7 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
[[maybe_unused]] bool done = doCreateTable(create, properties);
assert(done);
ast_drop->table = create.table;
ast_drop->is_dictionary = create.is_dictionary;
ast_drop->database = create.database;
ast_drop->kind = ASTDropQuery::Drop;
created = true;
@ -1113,14 +1114,18 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
ASTRenameQuery::Table{create.database, create.table},
ASTRenameQuery::Table{create.database, table_to_replace_name}
};
ast_rename->elements.push_back(std::move(elem));
ast_rename->exchange = true;
ast_rename->dictionary = create.is_dictionary;
InterpreterRenameQuery(ast_rename, getContext()).execute();
replaced = true;
InterpreterDropQuery(ast_drop, getContext()).execute();
create.table = table_to_replace_name;
return fillTableIfNeeded(create);
}
catch (...)

View File

@ -143,7 +143,7 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot);
actions = analyzer.simpleSelectActions();
auto column_name = expr_list->children.at(0)->getColumnName();
auto column_name = expr_list->children.at(0)->getColumnName(context->getSettingsRef());
actions->removeUnusedActions(NameSet{column_name});
actions->projectInput(false);
@ -779,7 +779,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
order_descr.reserve(query.orderBy()->children.size());
for (const auto & elem : query.orderBy()->children)
{
String name = elem->children.front()->getColumnName();
String name = elem->children.front()->getColumnName(context->getSettingsRef());
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
std::shared_ptr<Collator> collator;
@ -798,14 +798,14 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
return order_descr;
}
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query, ContextPtr context)
{
SortDescription order_descr;
order_descr.reserve(query.groupBy()->children.size());
for (const auto & elem : query.groupBy()->children)
{
String name = elem->getColumnName();
String name = elem->getColumnName(context->getSettingsRef());
order_descr.emplace_back(name, 1, 1);
}
@ -1948,13 +1948,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
{
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
query_info.projection->group_by_elements_actions,
getSortDescriptionFromGroupBy(query),
getSortDescriptionFromGroupBy(query, context),
query_info.syntax_analyzer_result);
}
else
{
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result);
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query, context), query_info.syntax_analyzer_result);
}
}
@ -2033,7 +2033,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
{
auto where_step = std::make_unique<FilterStep>(
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter);
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(context->getSettingsRef()), remove_filter);
where_step->setStepDescription("WHERE");
query_plan.addStep(std::move(where_step));
@ -2080,7 +2080,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
SortDescription group_by_sort_description;
if (group_by_info && settings.optimize_aggregation_in_order)
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery(), context);
else
group_by_info = nullptr;
@ -2128,7 +2128,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
{
auto having_step
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(context->getSettingsRef()), false);
having_step->setStepDescription("HAVING");
query_plan.addStep(std::move(having_step));
@ -2144,7 +2144,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
query_plan.getCurrentDataStream(),
overflow_row,
expression,
has_having ? getSelectQuery().having()->getColumnName() : "",
has_having ? getSelectQuery().having()->getColumnName(context->getSettingsRef()) : "",
settings.totals_mode,
settings.totals_auto_threshold,
final);
@ -2461,7 +2461,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)
Names columns;
for (const auto & elem : query.limitBy()->children)
columns.emplace_back(elem->getColumnName());
columns.emplace_back(elem->getColumnName(context->getSettingsRef()));
UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);

View File

@ -39,7 +39,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
if (context->getSettingsRef().normalize_function_names)
FunctionNameNormalizer().visit(ast.get());
String name = ast->getColumnName();
String name = ast->getColumnName(context->getSettingsRef());
auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();

View File

@ -305,8 +305,16 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
}
else
{
String action = "CREATE";
if (attach)
action = "ATTACH";
else if (replace_table && create_or_replace)
action = "CREATE OR REPLACE";
else if (replace_table)
action = "REPLACE";
/// Always DICTIONARY
settings.ostr << (settings.hilite ? hilite_keyword : "") << (attach ? "ATTACH " : "CREATE ") << "DICTIONARY "
settings.ostr << (settings.hilite ? hilite_keyword : "") << action << " DICTIONARY "
<< (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "")
<< (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table);
if (uuid != UUIDHelpers::Nil)

View File

@ -24,6 +24,16 @@ namespace ErrorCodes
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
{
appendColumnNameImpl(ostr, nullptr);
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
{
appendColumnNameImpl(ostr, &settings);
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const
{
if (name == "view")
throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
@ -37,19 +47,30 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
{
if (it != parameters->children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
if (settings)
(*it)->appendColumnName(ostr, *settings);
else
(*it)->appendColumnName(ostr);
}
writeChar(')', ostr);
}
writeChar('(', ostr);
if (arguments)
{
for (auto it = arguments->children.begin(); it != arguments->children.end(); ++it)
{
if (it != arguments->children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
if (settings)
(*it)->appendColumnName(ostr, *settings);
else
(*it)->appendColumnName(ostr);
}
}
writeChar(')', ostr);
if (is_window_function)
@ -61,11 +82,11 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
}
else
{
FormatSettings settings{ostr, true /* one_line */};
FormatSettings format_settings{ostr, true /* one_line */};
FormatState state;
FormatStateStacked frame;
writeCString("(", ostr);
window_definition->formatImpl(settings, state, frame);
window_definition->formatImpl(format_settings, state, frame);
writeCString(")", ostr);
}
}

View File

@ -54,6 +54,10 @@ public:
protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
private:
void appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const;
};

View File

@ -17,8 +17,10 @@ void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
applyVisitor(FieldVisitorHash(hash_state), value);
}
namespace
{
/// Writes 'tuple' word before tuple literals for backward compatibility reasons.
/// TODO: remove, when versions lower than 20.3 will be rarely used.
class FieldVisitorToColumnName : public StaticVisitor<String>
{
public:
@ -46,14 +48,51 @@ String FieldVisitorToColumnName::operator() (const Tuple & x) const
return wb.str();
}
}
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
{
if (settings.legacy_column_name_of_tuple_literal)
appendColumnNameImplLegacy(ostr);
else
appendColumnNameImpl(ostr);
}
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
{
/// 100 - just arbitrary value.
constexpr auto min_elements_for_hashing = 100;
/// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them.
/// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.)
auto type = value.getType();
if ((type == Field::Types::Array && value.get<const Array &>().size() > min_elements_for_hashing)
|| (type == Field::Types::Tuple && value.get<const Tuple &>().size() > min_elements_for_hashing))
{
SipHash hash;
applyVisitor(FieldVisitorHash(hash), value);
UInt64 low, high;
hash.get128(low, high);
writeCString(type == Field::Types::Array ? "__array_" : "__tuple_", ostr);
writeText(low, ostr);
ostr.write('_');
writeText(high, ostr);
}
else
{
String column_name = applyVisitor(FieldVisitorToString(), value);
writeString(column_name, ostr);
}
}
void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const
{
/// 100 - just arbitrary value.
constexpr auto min_elements_for_hashing = 100;
/// Special case for very large arrays. Instead of listing all elements, will use hash of them.
/// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.)
/// TODO: Also do hashing for large tuples, when versions lower than 20.3 will be rarely used, because it breaks backward compatibility.
auto type = value.getType();
if ((type == Field::Types::Array && value.get<const Array &>().size() > min_elements_for_hashing))
{

View File

@ -44,6 +44,13 @@ protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
private:
/// Legacy version of 'appendColumnNameImpl'. It differs only with tuple literals.
/// It's only needed to continue working of queries with tuple literals
/// in distributed tables while rolling update.
void appendColumnNameImplLegacy(WriteBuffer & ostr) const;
};
}

View File

@ -75,12 +75,15 @@ protected:
}
settings.ostr << (settings.hilite ? hilite_keyword : "");
if (exchange)
if (exchange && dictionary)
settings.ostr << "EXCHANGE DICTIONARIES ";
else if (exchange)
settings.ostr << "EXCHANGE TABLES ";
else if (dictionary)
settings.ostr << "RENAME DICTIONARY ";
else
settings.ostr << "RENAME TABLE ";
settings.ostr << (settings.hilite ? hilite_none : "");
for (auto it = elements.cbegin(); it != elements.cend(); ++it)

View File

@ -48,6 +48,14 @@ void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
appendColumnNameImpl(ostr);
}
void ASTWithAlias::appendColumnName(WriteBuffer & ostr, const Settings & settings) const
{
if (prefer_alias_to_column_name && !alias.empty())
writeString(alias, ostr);
else
appendColumnNameImpl(ostr, settings);
}
void ASTWithAlias::appendColumnNameWithoutAlias(WriteBuffer & ostr) const
{
appendColumnNameImpl(ostr);

View File

@ -21,8 +21,10 @@ public:
using IAST::IAST;
void appendColumnName(WriteBuffer & ostr) const final;
void appendColumnName(WriteBuffer & ostr, const Settings & settings) const final;
void appendColumnNameWithoutAlias(WriteBuffer & ostr) const final;
String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; }
String getAliasOrColumnName(const Settings & settings) const override { return alias.empty() ? getColumnName(settings) : alias; }
String tryGetAlias() const override { return alias; }
void setAlias(const String & to) override { alias = to; }
@ -33,6 +35,7 @@ public:
protected:
virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
virtual void appendColumnNameImpl(WriteBuffer & ostr, const Settings &) const { appendColumnNameImpl(ostr); }
};
/// helper for setting aliases and chaining result to other functions

View File

@ -109,6 +109,14 @@ String IAST::getColumnName() const
}
String IAST::getColumnName(const Settings & settings) const
{
WriteBufferFromOwnString write_buffer;
appendColumnName(write_buffer, settings);
return write_buffer.str();
}
String IAST::getColumnNameWithoutAlias() const
{
WriteBufferFromOwnString write_buffer;

View File

@ -5,6 +5,7 @@
#include <Parsers/IdentifierQuotingStyle.h>
#include <Common/Exception.h>
#include <Common/TypePromotion.h>
#include <Core/Settings.h>
#include <IO/WriteBufferFromString.h>
#include <algorithm>
@ -41,13 +42,18 @@ public:
/** Get the canonical name of the column if the element is a column */
String getColumnName() const;
String getColumnName(const Settings & settings) const;
/** Same as the above but ensure no alias names are used. This is for index analysis */
String getColumnNameWithoutAlias() const;
virtual void appendColumnName(WriteBuffer &) const
{
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
}
virtual void appendColumnName(WriteBuffer & ostr, const Settings &) const { appendColumnName(ostr); }
virtual void appendColumnNameWithoutAlias(WriteBuffer &) const
{
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
@ -55,6 +61,7 @@ public:
/** Get the alias, if any, or the canonical name of the column, if it is not. */
virtual String getAliasOrColumnName() const { return getColumnName(); }
virtual String getAliasOrColumnName(const Settings & settings) const { return getColumnName(settings); }
/** Get the alias, if any, or an empty string if it does not exist, or if the element does not support aliases. */
virtual String tryGetAlias() const { return String(); }

View File

@ -88,13 +88,13 @@ public:
*/
virtual bool parse(Pos & pos, ASTPtr & node, Expected & expected) = 0;
bool ignore(Pos & pos, Expected & expected)
bool ignore(Pos & pos, Expected & expected) // -V1071
{
ASTPtr ignore_node;
return parse(pos, ignore_node, expected);
}
bool ignore(Pos & pos)
bool ignore(Pos & pos) // -V1071
{
Expected expected;
return ignore(pos, expected);

View File

@ -35,7 +35,7 @@ public:
return res;
}
bool parse(Pos & pos, ASTPtr & node, Expected & expected) override;
bool parse(Pos & pos, ASTPtr & node, Expected & expected) override; // -V1071
protected:
virtual bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) = 0;

View File

@ -9,7 +9,7 @@
using namespace DB;
using namespace DB::MySQLParser;
static inline ASTPtr tryParserQuery(IParser & parser, const String & query)
static inline ASTPtr tryParserQuery(IParser & parser, const String & query) // -V1071
{
return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0);
}

View File

@ -971,6 +971,8 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E
{
ParserKeyword s_create("CREATE");
ParserKeyword s_attach("ATTACH");
ParserKeyword s_replace("REPLACE");
ParserKeyword s_or_replace("OR REPLACE");
ParserKeyword s_dictionary("DICTIONARY");
ParserKeyword s_if_not_exists("IF NOT EXISTS");
ParserKeyword s_on("ON");
@ -982,6 +984,8 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E
ParserDictionary dictionary_p;
bool if_not_exists = false;
bool replace = false;
bool or_replace = false;
ASTPtr name;
ASTPtr attributes;
@ -989,13 +993,21 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E
String cluster_str;
bool attach = false;
if (!s_create.ignore(pos, expected))
if (s_create.ignore(pos, expected))
{
if (s_attach.ignore(pos, expected))
attach = true;
else
return false;
if (s_or_replace.ignore(pos, expected))
{
replace = true;
or_replace = true;
}
}
else if (s_attach.ignore(pos, expected))
attach = true;
else if (s_replace.ignore(pos, expected))
replace = true;
else
return false;
if (!s_dictionary.ignore(pos, expected))
return false;
@ -1031,6 +1043,8 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E
node = query;
query->is_dictionary = true;
query->attach = attach;
query->create_or_replace = or_replace;
query->replace_table = replace;
auto dict_id = name->as<ASTTableIdentifier>()->getTableId();
query->database = dict_id.database_name;

View File

@ -57,6 +57,8 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserQuery p(end);
if (p.parse(pos, query, expected))
explain_query->setExplainedQuery(std::move(query));
else
return false;
}
else if (select_p.parse(pos, query, expected) ||
create_p.parse(pos, query, expected))

View File

@ -225,18 +225,19 @@ namespace DB
}
}
template <typename DecimalType, typename DecimalArray>
static void fillColumnWithDecimalData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, IColumn & internal_column)
{
auto & column = assert_cast<ColumnDecimal<Decimal128> &>(internal_column);
auto & column = assert_cast<ColumnDecimal<DecimalType> &>(internal_column);
auto & column_data = column.getData();
column_data.reserve(arrow_column->length());
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
{
auto & chunk = static_cast<arrow::DecimalArray &>(*(arrow_column->chunk(chunk_i)));
auto & chunk = static_cast<DecimalArray &>(*(arrow_column->chunk(chunk_i)));
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
{
column_data.emplace_back(chunk.IsNull(value_i) ? Decimal128(0) : *reinterpret_cast<const Decimal128 *>(chunk.Value(value_i))); // TODO: copy column
column_data.emplace_back(chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast<const DecimalType *>(chunk.Value(value_i))); // TODO: copy column
}
}
}
@ -335,8 +336,11 @@ namespace DB
case arrow::Type::TIMESTAMP:
fillColumnWithTimestampData(arrow_column, internal_column);
break;
case arrow::Type::DECIMAL:
fillColumnWithDecimalData(arrow_column, internal_column /*, internal_nested_type*/);
case arrow::Type::DECIMAL128:
fillColumnWithDecimalData<Decimal128, arrow::Decimal128Array>(arrow_column, internal_column /*, internal_nested_type*/);
break;
case arrow::Type::DECIMAL256:
fillColumnWithDecimalData<Decimal256, arrow::Decimal256Array>(arrow_column, internal_column /*, internal_nested_type*/);
break;
case arrow::Type::MAP: [[fallthrough]];
case arrow::Type::LIST:
@ -442,12 +446,18 @@ namespace DB
return makeNullable(getInternalType(arrow_type, nested_type, column_name, format_name));
}
if (arrow_type->id() == arrow::Type::DECIMAL)
if (arrow_type->id() == arrow::Type::DECIMAL128)
{
const auto * decimal_type = static_cast<arrow::DecimalType *>(arrow_type.get());
return std::make_shared<DataTypeDecimal<Decimal128>>(decimal_type->precision(), decimal_type->scale());
}
if (arrow_type->id() == arrow::Type::DECIMAL256)
{
const auto * decimal_type = static_cast<arrow::DecimalType *>(arrow_type.get());
return std::make_shared<DataTypeDecimal<Decimal256>>(decimal_type->precision(), decimal_type->scale());
}
if (arrow_type->id() == arrow::Type::LIST)
{
const auto * list_type = static_cast<arrow::ListType *>(arrow_type.get());

View File

@ -421,11 +421,20 @@ namespace DB
|| std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>>
|| std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
{
fillArrowArrayWithDecimalColumnData<ToDataType>(column, null_bytemap, array_builder, format_name, start, end);
fillArrowArrayWithDecimalColumnData<ToDataType, Int128, arrow::Decimal128, arrow::Decimal128Builder>(column, null_bytemap, array_builder, format_name, start, end);
return true;
}
if constexpr (std::is_same_v<ToDataType,DataTypeDecimal<Decimal256>>)
{
fillArrowArrayWithDecimalColumnData<ToDataType, Int256, arrow::Decimal256, arrow::Decimal256Builder>(column, null_bytemap, array_builder, format_name, start, end);
return true;
}
return false;
};
callOnIndexAndDataType<void>(column_type->getTypeId(), fill_decimal);
if (!callOnIndexAndDataType<void>(column_type->getTypeId(), fill_decimal))
throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot fill arrow array with decimal data with type {}", column_type_name};
}
#define DISPATCH(CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE) \
else if (#CPP_NUMERIC_TYPE == column_type_name) \
@ -445,7 +454,7 @@ namespace DB
}
}
template <typename DataType>
template <typename DataType, typename FieldType, typename ArrowDecimalType, typename ArrowBuilder>
static void fillArrowArrayWithDecimalColumnData(
ColumnPtr write_column,
const PaddedPODArray<UInt8> * null_bytemap,
@ -455,7 +464,7 @@ namespace DB
size_t end)
{
const auto & column = assert_cast<const typename DataType::ColumnType &>(*write_column);
arrow::DecimalBuilder & builder = assert_cast<arrow::DecimalBuilder &>(*array_builder);
ArrowBuilder & builder = assert_cast<ArrowBuilder &>(*array_builder);
arrow::Status status;
for (size_t value_i = start; value_i < end; ++value_i)
@ -463,8 +472,10 @@ namespace DB
if (null_bytemap && (*null_bytemap)[value_i])
status = builder.AppendNull();
else
status = builder.Append(
arrow::Decimal128(reinterpret_cast<const uint8_t *>(&column.getElement(value_i).value))); // TODO: try copy column
{
FieldType element = FieldType(column.getElement(value_i).value);
status = builder.Append(ArrowDecimalType(reinterpret_cast<const uint8_t *>(&element))); // TODO: try copy column
}
checkStatus(status, write_column->getName(), format_name);
}
@ -512,15 +523,18 @@ namespace DB
if constexpr (
std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>>
|| std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>>
|| std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
|| std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>
|| std::is_same_v<ToDataType, DataTypeDecimal<Decimal256>>)
{
const auto & decimal_type = assert_cast<const ToDataType *>(column_type.get());
arrow_type = arrow::decimal(decimal_type->getPrecision(), decimal_type->getScale());
return true;
}
return false;
};
callOnIndexAndDataType<void>(column_type->getTypeId(), create_arrow_type);
if (!callOnIndexAndDataType<void>(column_type->getTypeId(), create_arrow_type))
throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot convert decimal type {} to arrow type", column_type->getFamilyName()};
return arrow_type;
}

View File

@ -38,9 +38,10 @@ FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_
void FillingStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &)
{
pipeline.addSimpleTransform([&](const Block & header)
pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
{
return std::make_shared<FillingTransform>(header, sort_description);
bool on_totals = stream_type == QueryPipeline::StreamType::Totals;
return std::make_shared<FillingTransform>(header, sort_description, on_totals);
});
}

View File

@ -30,12 +30,16 @@ Block FillingTransform::transformHeader(Block header, const SortDescription & so
}
FillingTransform::FillingTransform(
const Block & header_, const SortDescription & sort_description_)
const Block & header_, const SortDescription & sort_description_, bool on_totals_)
: ISimpleTransform(header_, transformHeader(header_, sort_description_), true)
, sort_description(sort_description_)
, on_totals(on_totals_)
, filling_row(sort_description_)
, next_row(sort_description_)
{
if (on_totals)
return;
auto try_convert_fields = [](auto & descr, const auto & type)
{
auto max_type = Field::Types::Null;
@ -106,7 +110,7 @@ FillingTransform::FillingTransform(
IProcessor::Status FillingTransform::prepare()
{
if (input.isFinished() && !output.isFinished() && !has_input && !generate_suffix)
if (!on_totals && input.isFinished() && !output.isFinished() && !has_input && !generate_suffix)
{
should_insert_first = next_row < filling_row;
@ -126,6 +130,9 @@ IProcessor::Status FillingTransform::prepare()
void FillingTransform::transform(Chunk & chunk)
{
if (on_totals)
return;
Columns old_fill_columns;
Columns old_other_columns;
MutableColumns res_fill_columns;

View File

@ -13,7 +13,7 @@ namespace DB
class FillingTransform : public ISimpleTransform
{
public:
FillingTransform(const Block & header_, const SortDescription & sort_description_);
FillingTransform(const Block & header_, const SortDescription & sort_description_, bool on_totals_);
String getName() const override { return "FillingTransform"; }
@ -28,6 +28,8 @@ private:
void setResultColumns(Chunk & chunk, MutableColumns & fill_columns, MutableColumns & other_columns) const;
const SortDescription sort_description; /// Contains only rows with WITH FILL.
const bool on_totals; /// FillingTransform does nothing on totals.
FillingRow filling_row; /// Current row, which is used to fill gaps.
FillingRow next_row; /// Row to which we need to generate filling rows.

View File

@ -130,6 +130,8 @@ void MySQLHandler::run()
authenticate(handshake_response.username, handshake_response.auth_plugin_name, handshake_response.auth_response);
connection_context->getClientInfo().initial_user = handshake_response.username;
try
{
if (!handshake_response.database.empty())

View File

@ -60,13 +60,12 @@ namespace
constexpr const std::chrono::minutes decrease_error_count_period{5};
template <typename PoolFactory>
ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory, Poco::Logger * log)
ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory, const Cluster::ShardsInfo & shards_info, Poco::Logger * log)
{
ConnectionPoolPtrs pools;
for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it)
auto make_connection = [&](const Cluster::Address & address)
{
Cluster::Address address = Cluster::Address::fromFullString(boost::copy_range<std::string>(*it));
try
{
pools.emplace_back(factory(address));
@ -76,10 +75,35 @@ namespace
if (e.code() == ErrorCodes::INCORRECT_FILE_NAME)
{
tryLogCurrentException(log);
continue;
return;
}
throw;
}
};
for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it)
{
const std::string & dirname = boost::copy_range<std::string>(*it);
Cluster::Address address = Cluster::Address::fromFullString(dirname);
if (address.shard_index && dirname.ends_with("_all_replicas"))
{
if (address.shard_index > shards_info.size())
{
LOG_ERROR(log, "No shard with shard_index={} ({})", address.shard_index, name);
continue;
}
const auto & shard_info = shards_info[address.shard_index - 1];
size_t replicas = shard_info.per_replica_pools.size();
for (size_t replica_index = 1; replica_index <= replicas; ++replica_index)
{
address.replica_index = replica_index;
make_connection(address);
}
}
else
make_connection(address);
}
return pools;
@ -420,13 +444,13 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
const auto & shards_info = cluster->getShardsInfo();
const auto & shards_addresses = cluster->getShardsAddresses();
/// check new format shard{shard_index}_number{replica_index}
/// check new format shard{shard_index}_replica{replica_index}
/// (shard_index and replica_index starts from 1)
if (address.shard_index != 0)
{
if (!address.replica_index)
throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
"Wrong replica_index ({})", address.replica_index, name);
"Wrong replica_index={} ({})", address.replica_index, name);
if (address.shard_index > shards_info.size())
throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
@ -475,7 +499,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
address.secure);
};
auto pools = createPoolsForAddresses(name, pool_factory, storage.log);
auto pools = createPoolsForAddresses(name, pool_factory, storage.getCluster()->getShardsInfo(), storage.log);
const auto settings = storage.getContext()->getSettings();
return pools.size() == 1 ? pools.front() : std::make_shared<ConnectionPoolWithFailover>(pools,

View File

@ -752,7 +752,7 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std::
auto sleep_ms = context->getSettingsRef().distributed_directory_monitor_sleep_time_ms;
for (const auto & dir_name : dir_names)
{
auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name);
auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name, /* startup= */ false);
directory_monitor.addAndSchedule(file_size, sleep_ms.totalMilliseconds());
}
}

View File

@ -49,7 +49,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
extern const int TABLE_WAS_NOT_DROPPED;
extern const int QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW;
@ -82,9 +81,8 @@ static StorageID extractDependentTable(ASTPtr & query, ContextPtr context, const
{
auto * ast_select = subquery->as<ASTSelectWithUnionQuery>();
if (!ast_select)
throw Exception("Logical error while creating StorageLiveView."
" Could not retrieve table name from select query.",
DB::ErrorCodes::LOGICAL_ERROR);
throw Exception("LIVE VIEWs are only supported for queries from tables, but there is no table name in select query.",
DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW);
if (ast_select->list_of_selects->children.size() != 1)
throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW);

View File

@ -2331,7 +2331,14 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c
if (part->info.partition_id != drop_range.partition_id)
throw Exception("Unexpected partition_id of part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR);
if (part->info.min_block < drop_range.min_block) /// NOTE Always false, because drop_range.min_block == 0
/// It's a DROP PART and it's already executed by fetching some covering part
if (part->info != drop_range && part->info.contains(drop_range))
{
LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartName(), part->name);
return {};
}
if (part->info.min_block < drop_range.min_block)
{
if (drop_range.min_block <= part->info.max_block)
{
@ -2658,7 +2665,6 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until) const
std::this_thread::sleep_for(std::chrono::milliseconds(static_cast<size_t>(delay_milliseconds)));
}
MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(
const MergeTreePartInfo & part_info, MergeTreeData::DataPartState state, DataPartsLock & /*lock*/) const
{
@ -2760,7 +2766,6 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartition(Merg
data_parts_by_state_and_info.upper_bound(state_with_partition));
}
MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states)
{
auto lock = lockParts();

View File

@ -264,6 +264,10 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
if (!can_merge_callback(nullptr, part, nullptr))
continue;
/// This part can be merged only with next parts (no prev part exists), so start
/// new interval if previous was not empty.
if (!parts_ranges.back().empty())
parts_ranges.emplace_back();
}
else
{
@ -271,12 +275,21 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
/// interval (in the same partition)
if (!can_merge_callback(*prev_part, part, nullptr))
{
/// Starting new interval in the same partition
assert(!parts_ranges.back().empty());
parts_ranges.emplace_back();
/// Now we have no previous part, but it affects only logging
/// Now we have no previous part
prev_part = nullptr;
/// Mustn't be empty
assert(!parts_ranges.back().empty());
/// Some parts cannot be merged with previous parts and also cannot be merged with themselves,
/// for example, merge is already assigned for such parts, or they participate in quorum inserts
/// and so on.
/// Also we don't start new interval here (maybe all next parts cannot be merged and we don't want to have empty interval)
if (!can_merge_callback(nullptr, part, nullptr))
continue;
/// Starting new interval in the same partition
parts_ranges.emplace_back();
}
}

View File

@ -234,6 +234,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, B
if (const auto * function = node->as<ASTFunction>())
{
if (!function->arguments)
return false;
const ASTs & arguments = function->arguments->children;
for (const auto & arg : arguments)
{

View File

@ -1,5 +1,6 @@
#pragma once
#include <limits>
#include <tuple>
#include <common/types.h>
#include <common/DayNum.h>
@ -85,7 +86,7 @@ struct MergeTreePartInfo
return static_cast<UInt64>(max_block - min_block + 1);
}
static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version);
static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version); // -V1071
static bool tryParsePartName(const String & part_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version);

View File

@ -137,14 +137,21 @@ void ReplicatedMergeTreeQueue::insertUnlocked(
for (const String & virtual_part_name : entry->getVirtualPartNames(format_version))
{
virtual_parts.add(virtual_part_name, nullptr, log);
addPartToMutations(virtual_part_name);
/// Don't add drop range parts to mutations
/// they don't produce any useful parts
if (entry->type != LogEntry::DROP_RANGE)
addPartToMutations(virtual_part_name);
}
/// Put 'DROP PARTITION' entries at the beginning of the queue not to make superfluous fetches of parts that will be eventually deleted
if (entry->type != LogEntry::DROP_RANGE)
{
queue.push_back(entry);
}
else
{
queue.push_front(entry);
}
if (entry->type == LogEntry::GET_PART || entry->type == LogEntry::ATTACH_PART)
{
@ -891,6 +898,10 @@ bool ReplicatedMergeTreeQueue::checkReplaceRangeCanBeRemoved(const MergeTreePart
if (entry_ptr->replace_range_entry == current.replace_range_entry) /// same partition, don't want to drop ourselves
return false;
if (!part_info.contains(MergeTreePartInfo::fromPartName(entry_ptr->replace_range_entry->drop_range_part_name, format_version)))
return false;
size_t number_of_covered_parts = 0;
for (const String & new_part_name : entry_ptr->replace_range_entry->new_part_names)
{

View File

@ -215,23 +215,26 @@ LoadablesConfigurationPtr StorageDictionary::getConfiguration() const
void StorageDictionary::renameInMemory(const StorageID & new_table_id)
{
auto old_table_id = getStorageID();
IStorage::renameInMemory(new_table_id);
if (configuration)
{
configuration->setString("dictionary.database", new_table_id.database_name);
configuration->setString("dictionary.name", new_table_id.table_name);
const auto & external_dictionaries_loader = getContext()->getExternalDictionariesLoader();
external_dictionaries_loader.reloadConfig(getStorageID().getInternalDictionaryName());
auto result = external_dictionaries_loader.getLoadResult(old_table_id.getInternalDictionaryName());
auto result = external_dictionaries_loader.getLoadResult(getStorageID().getInternalDictionaryName());
if (!result.object)
return;
if (result.object)
{
const auto dictionary = std::static_pointer_cast<const IDictionary>(result.object);
dictionary->updateDictionaryName(new_table_id);
}
const auto dictionary = std::static_pointer_cast<const IDictionary>(result.object);
dictionary->updateDictionaryName(new_table_id);
external_dictionaries_loader.reloadConfig(old_table_id.getInternalDictionaryName());
dictionary_name = new_table_id.getFullNameNotQuoted();
}
IStorage::renameInMemory(new_table_id);
}
void registerStorageDictionary(StorageFactory & factory)

View File

@ -45,7 +45,7 @@ public:
Poco::Timestamp getUpdateTime() const;
LoadablesConfigurationPtr getConfiguration() const;
const String & getDictionaryName() const { return dictionary_name; }
String getDictionaryName() const { return dictionary_name; }
/// Specifies where the table is located relative to the dictionary.
enum class Location
@ -66,7 +66,7 @@ public:
};
private:
const String dictionary_name;
String dictionary_name;
const Location location;
mutable std::mutex dictionary_config_mutex;

View File

@ -800,12 +800,33 @@ void StorageDistributed::startup()
if (!storage_policy)
return;
for (const DiskPtr & disk : data_volume->getDisks())
createDirectoryMonitors(disk);
const auto & disks = data_volume->getDisks();
for (const String & path : getDataPaths())
/// Make initialization for large number of disks parallel.
ThreadPool pool(disks.size());
for (const DiskPtr & disk : disks)
{
pool.scheduleOrThrowOnError([&]()
{
createDirectoryMonitors(disk);
});
}
pool.wait();
const auto & paths = getDataPaths();
std::vector<UInt64> last_increment(paths.size());
for (size_t i = 0; i < paths.size(); ++i)
{
pool.scheduleOrThrowOnError([&, i]()
{
last_increment[i] = getMaximumFileNumber(paths[i]);
});
}
pool.wait();
for (const auto inc : last_increment)
{
UInt64 inc = getMaximumFileNumber(path);
if (inc > file_names_increment.value)
file_names_increment.value.store(inc);
}
@ -907,30 +928,50 @@ void StorageDistributed::createDirectoryMonitors(const DiskPtr & disk)
}
else
{
requireDirectoryMonitor(disk, dir_path.filename().string());
requireDirectoryMonitor(disk, dir_path.filename().string(), /* startup= */ true);
}
}
}
}
StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const DiskPtr & disk, const std::string & name)
StorageDistributedDirectoryMonitor& StorageDistributed::requireDirectoryMonitor(const DiskPtr & disk, const std::string & name, bool startup)
{
const std::string & disk_path = disk->getPath();
const std::string key(disk_path + name);
std::lock_guard lock(cluster_nodes_mutex);
auto & node_data = cluster_nodes_data[key];
if (!node_data.directory_monitor)
auto create_node_data = [&]()
{
node_data.connection_pool = StorageDistributedDirectoryMonitor::createPool(name, *this);
node_data.directory_monitor = std::make_unique<StorageDistributedDirectoryMonitor>(
ClusterNodeData data;
data.connection_pool = StorageDistributedDirectoryMonitor::createPool(name, *this);
data.directory_monitor = std::make_unique<StorageDistributedDirectoryMonitor>(
*this, disk, relative_data_path + name,
node_data.connection_pool,
data.connection_pool,
monitors_blocker,
getContext()->getDistributedSchedulePool());
return data;
};
/// In case of startup the lock can be acquired later.
if (startup)
{
auto tmp_node_data = create_node_data();
std::lock_guard lock(cluster_nodes_mutex);
auto & node_data = cluster_nodes_data[key];
assert(!node_data.directory_monitor);
node_data = std::move(tmp_node_data);
return *node_data.directory_monitor;
}
else
{
std::lock_guard lock(cluster_nodes_mutex);
auto & node_data = cluster_nodes_data[key];
if (!node_data.directory_monitor)
{
node_data = create_node_data();
}
return *node_data.directory_monitor;
}
return *node_data.directory_monitor;
}
std::vector<StorageDistributedDirectoryMonitor::Status> StorageDistributed::getDirectoryMonitorsStatuses() const
@ -1155,6 +1196,7 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data)
{
for (const DiskPtr & disk : data_volume->getDisks())
{
disk->createDirectories(new_path_to_table_data);
disk->moveDirectory(relative_data_path, new_path_to_table_data);
auto new_path = disk->getPath() + new_path_to_table_data;

View File

@ -160,7 +160,7 @@ private:
/// create directory monitors for each existing subdirectory
void createDirectoryMonitors(const DiskPtr & disk);
/// ensure directory monitor thread and connectoin pool creation by disk and subdirectory name
StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const DiskPtr & disk, const std::string & name);
StorageDistributedDirectoryMonitor & requireDirectoryMonitor(const DiskPtr & disk, const std::string & name, bool startup);
/// Return list of metrics for all created monitors
/// (note that monitors are created lazily, i.e. until at least one INSERT executed)

View File

@ -2196,11 +2196,6 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
queue.removePartProducingOpsInRange(getZooKeeper(), drop_range_info, entry);
if (entry.detach)
LOG_DEBUG(log, "Detaching parts.");
else
LOG_DEBUG(log, "Removing parts.");
/// Delete the parts contained in the range to be deleted.
/// It's important that no old parts remain (after the merge), because otherwise,
/// after adding a new replica, this new replica downloads them, but does not delete them.
@ -2212,8 +2207,15 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
{
auto data_parts_lock = lockParts();
parts_to_remove = removePartsInRangeFromWorkingSet(drop_range_info, true, data_parts_lock);
if (parts_to_remove.empty())
return;
}
if (entry.detach)
LOG_DEBUG(log, "Detaching parts.");
else
LOG_DEBUG(log, "Removing parts.");
if (entry.detach)
{
/// If DETACH clone parts to detached/ directory
@ -6992,15 +6994,16 @@ bool StorageReplicatedMergeTree::dropPartImpl(
getClearBlocksInPartitionOps(ops, *zookeeper, part_info.partition_id, part_info.min_block, part_info.max_block);
size_t clear_block_ops_size = ops.size();
/// Set fake level to treat this part as virtual in queue.
auto drop_part_info = part->info;
drop_part_info.level = MergeTreePartInfo::MAX_LEVEL;
/// If `part_name` is result of a recent merge and source parts are still available then
/// DROP_RANGE with detach will move this part together with source parts to `detached/` dir.
entry.type = LogEntry::DROP_RANGE;
entry.source_replica = replica_name;
entry.new_part_name = getPartNamePossiblyFake(format_version, drop_part_info);
/// We don't set fake drop level (999999999) for the single part DROP_RANGE.
/// First of all we don't guarantee anything other than the part will not be
/// active after DROP PART, but covering part (without data of dropped part) can exist.
/// If we add part with 9999999 level than we can break invariant in virtual_parts of
/// the queue.
entry.new_part_name = getPartNamePossiblyFake(format_version, part->info);
entry.detach = detach;
entry.create_time = time(nullptr);

View File

@ -31,7 +31,10 @@ namespace ErrorCodes
StorageView::StorageView(
const StorageID & table_id_, const ASTCreateQuery & query, const ColumnsDescription & columns_, const String & comment)
const StorageID & table_id_,
const ASTCreateQuery & query,
const ColumnsDescription & columns_,
const String & comment)
: IStorage(table_id_)
{
StorageInMemoryMetadata storage_metadata;
@ -40,7 +43,6 @@ StorageView::StorageView(
if (!query.select)
throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY);
SelectQueryDescription description;
description.inner_query = query.select->ptr();
@ -84,7 +86,12 @@ void StorageView::read(
current_inner_query = query_info.view_query->clone();
}
InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, {}, column_names);
auto modified_context = Context::createCopy(context);
/// Use settings from global context,
/// because difference between settings set on VIEW creation and query execution can break queries
modified_context->setSettings(context->getGlobalContext()->getSettingsRef());
InterpreterSelectWithUnionQuery interpreter(current_inner_query, modified_context, {}, column_names);
interpreter.buildQueryPlan(query_plan);
/// It's expected that the columns read from storage are not constant.

Some files were not shown because too many files have changed in this diff Show More