Merge remote-tracking branch 'origin/master' into tmp

This commit is contained in:
Alexander Kuzmenkov 2020-09-28 16:29:51 +03:00
commit 218d86de7e
299 changed files with 5095 additions and 1414 deletions

View File

@ -28,10 +28,11 @@ endforeach()
project(ClickHouse)
# If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue.
option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION
"Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfy"
ON
)
"Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF)
but is not possible to satisfy" ON)
if(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION)
set(RECONFIGURE_MESSAGE_LEVEL FATAL_ERROR)
else()
@ -58,7 +59,11 @@ set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a pos
# For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
option(ENABLE_IPO "Enable full link time optimization (it's usually impractical; see also ENABLE_THINLTO)" OFF) # need cmake 3.9+
# cmake 3.9+ needed.
# Usually impractical.
# See also ${ENABLE_THINLTO}
option(ENABLE_IPO "Full link time optimization")
if(ENABLE_IPO)
cmake_policy(SET CMP0069 NEW)
include(CheckIPOSupported)
@ -93,11 +98,16 @@ message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
option (USE_STATIC_LIBRARIES "Set to FALSE to use shared libraries" ON)
option (MAKE_STATIC_LIBRARIES "Set to FALSE to make shared libraries" ${USE_STATIC_LIBRARIES})
option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
option(MAKE_STATIC_LIBRARIES "Disable to make shared libraries" ${USE_STATIC_LIBRARIES})
if (NOT MAKE_STATIC_LIBRARIES)
option (SPLIT_SHARED_LIBRARIES "DEV ONLY. Keep all internal libs as separate .so for faster linking" OFF)
option (CLICKHOUSE_SPLIT_BINARY "Make several binaries instead one bundled (clickhouse-server, clickhouse-client, ... )" OFF)
# DEVELOPER ONLY.
# Faster linking if turned on.
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files")
option(CLICKHOUSE_SPLIT_BINARY
"Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled")
endif ()
if (MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
@ -112,7 +122,8 @@ if (USE_STATIC_LIBRARIES)
list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
endif ()
option (ENABLE_FUZZING "Enables fuzzing instrumentation" OFF)
# Implies ${WITH_COVERAGE}
option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF)
if (ENABLE_FUZZING)
message (STATUS "Fuzzing instrumentation enabled")
@ -144,10 +155,13 @@ if (COMPILER_CLANG)
endif ()
endif ()
option (ENABLE_TESTS "Enables tests" ON)
# If turned `ON`, assumes the user has either the system GTest library or the bundled one.
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0")
option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies ENABLE_FASTMEMCPY." ON)
# Only for Linux, x86_64.
# Implies ${ENABLE_FASTMEMCPY}
option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON)
elseif(GLIBC_COMPATIBILITY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
endif ()
@ -159,7 +173,7 @@ endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
@ -180,7 +194,9 @@ else ()
set(NO_WHOLE_ARCHIVE --no-whole-archive)
endif ()
option (ADD_GDB_INDEX_FOR_GOLD "Set to add .gdb-index to resulting binaries for gold linker. NOOP if lld is used." 0)
# Ignored if `lld` is used
option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
if (LINKER_NAME STREQUAL "lld")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
@ -201,9 +217,13 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
endif()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000)
option(COMPILER_PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON)
# Less `/tmp` usage, more RAM usage.
option(COMPILER_PIPE "-pipe compiler option" ON)
endif()
if(COMPILER_PIPE)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -pipe")
else()
@ -214,7 +234,8 @@ if(NOT DISABLE_CPU_OPTIMIZE)
include(cmake/cpu_features.cmake)
endif()
option(ARCH_NATIVE "Enable -march=native compiler flag" 0)
option(ARCH_NATIVE "Add -march=native compiler flag")
if (ARCH_NATIVE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
endif ()
@ -225,6 +246,7 @@ if (UNBUNDLED AND (COMPILER_GCC OR COMPILER_CLANG))
else()
set (_CXX_STANDARD "-std=c++2a")
endif()
# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now
# set (CMAKE_CXX_STANDARD 20)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_CXX_STANDARD}")
@ -237,7 +259,8 @@ if (COMPILER_GCC OR COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
endif ()
option(WITH_COVERAGE "Build with coverage." 0)
# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc
option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
if (WITH_COVERAGE AND COMPILER_CLANG)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
@ -271,10 +294,13 @@ if (COMPILER_CLANG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths")
if (NOT ENABLE_TESTS AND NOT SANITIZE)
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
# https://clang.llvm.org/docs/ThinLTO.html
# Applies to clang only.
# Disabled when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimization" ON)
endif()
# We cannot afford to use LTO when compiling unitests, and it's not enough
# We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it
# completely.
if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE)
@ -287,8 +313,8 @@ if (COMPILER_CLANG)
endif ()
# Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled
find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
if (LLVM_AR_PATH)
message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.")
set (CMAKE_AR ${LLVM_AR_PATH})
@ -296,31 +322,39 @@ if (COMPILER_CLANG)
message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.")
endif ()
find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8")
find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8")
if (LLVM_RANLIB_PATH)
message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.")
set (CMAKE_RANLIB ${LLVM_RANLIB_PATH})
else ()
message(WARNING "Cannot find llvm-ranlib. System ranlib will be used instead. It does not work with ThinLTO.")
endif ()
elseif (ENABLE_THINLTO)
message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with CLang")
endif ()
option (ENABLE_LIBRARIES "Enable all libraries (Global default switch)" ON)
# Turns on all external libs like s3, kafka, ODBC, ...
option(ENABLE_LIBRARIES "Enable all external libraries by default" ON)
# We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your
# system.
# This mode exists for enthusiastic developers who are searching for trouble.
# Useful for maintainers of OS packages.
option (UNBUNDLED "Use system libraries instead of ones in contrib/" OFF)
option (UNBUNDLED "Try find all libraries in system. We recommend to avoid this mode for production builds, because we cannot guarantee exact versions and variants of libraries your system has installed. This mode exists for enthusiastic developers who search for trouble. Also it is useful for maintainers of OS packages." OFF)
if (UNBUNDLED)
set(NOT_UNBUNDLED 0)
set(NOT_UNBUNDLED OFF)
else ()
set(NOT_UNBUNDLED 1)
set(NOT_UNBUNDLED ON)
endif ()
if (UNBUNDLED OR NOT (OS_LINUX OR OS_DARWIN))
# Using system libs can cause a lot of warnings in includes (on macro expansion).
option (WERROR "Enable -Werror compiler option" OFF)
option(WERROR "Enable -Werror compiler option" OFF)
else ()
option (WERROR "Enable -Werror compiler option" ON)
option(WERROR "Enable -Werror compiler option" ON)
endif ()
if (WERROR)
@ -362,8 +396,9 @@ else ()
set (CMAKE_POSITION_INDEPENDENT_CODE ON)
endif ()
# Using "include-what-you-use" tool.
option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF)
# https://github.com/include-what-you-use/include-what-you-use
option (USE_INCLUDE_WHAT_YOU_USE "Automatically reduce unneeded includes in source code (external tool)" OFF)
if (USE_INCLUDE_WHAT_YOU_USE)
find_program(IWYU_PATH NAMES include-what-you-use iwyu)
if (NOT IWYU_PATH)
@ -375,8 +410,11 @@ if (USE_INCLUDE_WHAT_YOU_USE)
endif ()
if (ENABLE_TESTS)
message (STATUS "Tests are enabled")
message (STATUS "Unit tests are enabled")
else()
message(STATUS "Unit tests are disabled")
endif ()
enable_testing() # Enable for tests without binary
# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
@ -386,7 +424,13 @@ else ()
set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc")
endif ()
message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
message (STATUS
"Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ;
USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES}
MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES}
SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES}
UNBUNDLED=${UNBUNDLED}
CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
include (GNUInstallDirs)
include (cmake/contrib_finder.cmake)

View File

@ -17,5 +17,5 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [eBay migrating from Druid](https://us02web.zoom.us/webinar/register/tZMkfu6rpjItHtaQ1DXcgPWcSOnmM73HLGKL) on September 23, 2020.
* [ClickHouse for Edge Analytics](https://ones2020.sched.com/event/bWPs) on September 29, 2020.
* [ClickHouse online meetup (in Russian)](https://clck.ru/R2zB9) on October 1, 2020.

View File

@ -358,7 +358,12 @@ private:
}
else
{
if (number * sizeof(base_type) < sizeof(T))
if constexpr (sizeof(T) <= sizeof(base_type))
{
if (!number)
return x;
}
else if (number * sizeof(base_type) < sizeof(T))
return x >> (number * base_bits); // & std::numeric_limits<base_type>::max()
return 0;
}
@ -366,26 +371,32 @@ private:
template <typename T>
constexpr static integer<Bits, Signed>
op_minus(const integer<Bits, Signed> & lhs, T rhs)
minus(const integer<Bits, Signed> & lhs, T rhs)
{
integer<Bits, Signed> res;
constexpr const unsigned rhs_items = (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1;
constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items;
bool is_underflow = false;
for (unsigned i = 0; i < item_count; ++i)
integer<Bits, Signed> res(lhs);
bool underflows[item_count] = {};
for (unsigned i = 0; i < op_items; ++i)
{
base_type lhs_item = lhs.items[little(i)];
base_type rhs_item = get_item(rhs, i);
base_type & res_item = res.items[little(i)];
if (is_underflow)
underflows[i] = res_item < rhs_item;
res_item -= rhs_item;
}
for (unsigned i = 1; i < item_count; ++i)
{
if (underflows[i-1])
{
is_underflow = (lhs_item == 0);
--lhs_item;
base_type & res_item = res.items[little(i)];
if (res_item == 0)
underflows[i] = true;
--res_item;
}
if (lhs_item < rhs_item)
is_underflow = true;
res.items[little(i)] = lhs_item - rhs_item;
}
return res;
@ -393,39 +404,44 @@ private:
template <typename T>
constexpr static integer<Bits, Signed>
op_plus(const integer<Bits, Signed> & lhs, T rhs)
plus(const integer<Bits, Signed> & lhs, T rhs)
{
integer<Bits, Signed> res;
constexpr const unsigned rhs_items = (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1;
constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items;
bool is_overflow = false;
for (unsigned i = 0; i < item_count; ++i)
integer<Bits, Signed> res(lhs);
bool overflows[item_count] = {};
for (unsigned i = 0; i < op_items; ++i)
{
base_type lhs_item = lhs.items[little(i)];
base_type rhs_item = get_item(rhs, i);
if (is_overflow)
{
++lhs_item;
is_overflow = (lhs_item == 0);
}
base_type & res_item = res.items[little(i)];
res_item = lhs_item + rhs_item;
if (res_item < rhs_item)
is_overflow = true;
res_item += rhs_item;
overflows[i] = res_item < rhs_item;
}
for (unsigned i = 1; i < item_count; ++i)
{
if (overflows[i-1])
{
base_type & res_item = res.items[little(i)];
++res_item;
if (res_item == 0)
overflows[i] = true;
}
}
return res;
}
template <typename T>
constexpr static auto op_multiply(const integer<Bits, Signed> & lhs, const T & rhs)
constexpr static auto multiply(const integer<Bits, Signed> & lhs, const T & rhs)
{
integer<Bits, Signed> res{};
#if 1
integer<Bits, Signed> lhs2 = op_plus(lhs, shift_left(lhs, 1));
integer<Bits, Signed> lhs3 = op_plus(lhs2, shift_left(lhs, 2));
integer<Bits, Signed> lhs2 = plus(lhs, shift_left(lhs, 1));
integer<Bits, Signed> lhs3 = plus(lhs2, shift_left(lhs, 2));
#endif
for (unsigned i = 0; i < item_count; ++i)
{
@ -437,7 +453,7 @@ private:
#if 1 /// optimization
if ((rhs_item & 0x7) == 0x7)
{
res = op_plus(res, shift_left(lhs3, pos));
res = plus(res, shift_left(lhs3, pos));
rhs_item >>= 3;
pos += 3;
continue;
@ -445,14 +461,14 @@ private:
if ((rhs_item & 0x3) == 0x3)
{
res = op_plus(res, shift_left(lhs2, pos));
res = plus(res, shift_left(lhs2, pos));
rhs_item >>= 2;
pos += 2;
continue;
}
#endif
if (rhs_item & 1)
res = op_plus(res, shift_left(lhs, pos));
res = plus(res, shift_left(lhs, pos));
rhs_item >>= 1;
++pos;
@ -475,7 +491,7 @@ public:
constexpr static integer<Bits, Signed>
operator_unary_minus(const integer<Bits, Signed> & lhs) noexcept(std::is_same_v<Signed, unsigned>)
{
return op_plus(operator_unary_tilda(lhs), 1);
return plus(operator_unary_tilda(lhs), 1);
}
template <typename T>
@ -484,9 +500,9 @@ public:
if constexpr (should_keep_size<T>())
{
if (is_negative(rhs))
return op_minus(lhs, -rhs);
return minus(lhs, -rhs);
else
return op_plus(lhs, rhs);
return plus(lhs, rhs);
}
else
{
@ -502,9 +518,9 @@ public:
if constexpr (should_keep_size<T>())
{
if (is_negative(rhs))
return op_plus(lhs, -rhs);
return plus(lhs, -rhs);
else
return op_minus(lhs, rhs);
return minus(lhs, rhs);
}
else
{
@ -523,12 +539,12 @@ public:
if constexpr (std::is_signed_v<Signed>)
{
res = op_multiply((is_negative(lhs) ? make_positive(lhs) : lhs),
res = multiply((is_negative(lhs) ? make_positive(lhs) : lhs),
(is_negative(rhs) ? make_positive(rhs) : rhs));
}
else
{
res = op_multiply(lhs, (is_negative(rhs) ? make_positive(rhs) : rhs));
res = multiply(lhs, (is_negative(rhs) ? make_positive(rhs) : rhs));
}
if (std::is_same_v<Signed, signed> && is_negative(lhs) != is_negative(rhs))
@ -775,20 +791,20 @@ public:
{
if (*c >= '0' && *c <= '9')
{
res = op_multiply(res, 16U);
res = op_plus(res, *c - '0');
res = multiply(res, 16U);
res = plus(res, *c - '0');
++c;
}
else if (*c >= 'a' && *c <= 'f')
{
res = op_multiply(res, 16U);
res = op_plus(res, *c - 'a' + 10U);
res = multiply(res, 16U);
res = plus(res, *c - 'a' + 10U);
++c;
}
else if (*c >= 'A' && *c <= 'F')
{ // tolower must be used, but it is not constexpr
res = op_multiply(res, 16U);
res = op_plus(res, *c - 'A' + 10U);
res = multiply(res, 16U);
res = plus(res, *c - 'A' + 10U);
++c;
}
else
@ -802,8 +818,8 @@ public:
if (*c < '0' || *c > '9')
throwError("invalid char from");
res = op_multiply(res, 10U);
res = op_plus(res, *c - '0');
res = multiply(res, 10U);
res = plus(res, *c - '0');
++c;
}
}

View File

@ -14,6 +14,8 @@ TRIES=3
AMD64_BIN_URL="https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_build_check/gcc-10_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"
AARCH64_BIN_URL="https://clickhouse-builds.s3.yandex.net/0/e29c4c3cc47ab2a6c4516486c1b77d57e7d42643/clickhouse_special_build_check/clang-10-aarch64_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
FASTER_DOWNLOAD=wget
if command -v axel >/dev/null; then
FASTER_DOWNLOAD=axel
@ -36,14 +38,6 @@ if [[ ! -f clickhouse ]]; then
$FASTER_DOWNLOAD "$AMD64_BIN_URL"
elif [[ $CPU == aarch64 ]]; then
$FASTER_DOWNLOAD "$AARCH64_BIN_URL"
# Download configs. ARM version has no embedded configs.
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/users.xml
mkdir config.d
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/access_control.xml -O config.d/access_control.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml
else
echo "Unsupported CPU type: $CPU"
exit 1
@ -60,10 +54,12 @@ if [[ ! -d data ]]; then
if [[ ! -f $DATASET ]]; then
$FASTER_DOWNLOAD "https://clickhouse-datasets.s3.yandex.net/hits/partitions/$DATASET"
fi
tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET
fi
uptime
echo "Starting clickhouse-server"
./clickhouse server > server.log 2>&1 &
@ -105,9 +101,12 @@ echo
echo "Benchmark complete. System info:"
echo
echo '----Version and build id--------'
./clickhouse local --query "SELECT version(), buildId()"
echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------'
cat /proc/cpuinfo | grep -i -F 'model name' | uniq
lscpu
echo '----Block Devices---------------'
lsblk

View File

@ -1,20 +1,28 @@
# This file configures static analysis tools that can be integrated to the build process
# https://clang.llvm.org/extra/clang-tidy/
option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)
option (ENABLE_CLANG_TIDY "Use 'clang-tidy' static analyzer if present" OFF)
if (ENABLE_CLANG_TIDY)
if (${CMAKE_VERSION} VERSION_LESS "3.6.0")
message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.")
endif()
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
if (CLANG_TIDY_PATH)
message(STATUS "Using clang-tidy: ${CLANG_TIDY_PATH}. The checks will be run during build process. See the .clang-tidy file at the root directory to configure the checks.")
set (USE_CLANG_TIDY 1)
message(STATUS
"Using clang-tidy: ${CLANG_TIDY_PATH}.
The checks will be run during build process.
See the .clang-tidy file at the root directory to configure the checks.")
set (USE_CLANG_TIDY ON)
# The variable CMAKE_CXX_CLANG_TIDY will be set inside src and base directories with non third-party code.
# set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
elseif (FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION)
message(FATAL_ERROR "clang-tidy is not found")
else ()
message(STATUS "clang-tidy is not found. This is normal - the tool is only used for static code analysis and isn't essential for the build.")
message(STATUS
"clang-tidy is not found.
This is normal - the tool is only used for code static analysis and isn't essential for the build.")
endif ()
endif ()

View File

@ -18,7 +18,8 @@ if (NOT CCACHE_FOUND AND NOT DEFINED ENABLE_CCACHE AND NOT COMPILER_MATCHES_CCAC
"Setting it up will significantly reduce compilation time for 2nd and consequent builds")
endif()
option(ENABLE_CCACHE "Speedup re-compilations using ccache" ${ENABLE_CCACHE_BY_DEFAULT})
# https://ccache.dev/
option(ENABLE_CCACHE "Speedup re-compilations using ccache (external tool)" ${ENABLE_CCACHE_BY_DEFAULT})
if (NOT ENABLE_CCACHE)
return()

View File

@ -4,13 +4,16 @@ if (NOT USE_LIBCXX)
if (USE_INTERNAL_LIBCXX_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal libcxx with USE_LIBCXX=OFF")
endif()
target_link_libraries(global-libs INTERFACE -l:libstdc++.a -l:libstdc++fs.a) # Always link these libraries as static
target_link_libraries(global-libs INTERFACE ${EXCEPTION_HANDLING_LIBRARY})
return()
endif()
set(USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT ${NOT_UNBUNDLED})
option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx and libcxxabi libraries instead of bundled" ${USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT})
option (USE_INTERNAL_LIBCXX_LIBRARY "Disable to use system libcxx and libcxxabi libraries instead of bundled"
${USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/CMakeLists.txt")
if (USE_INTERNAL_LIBCXX_LIBRARY)

View File

@ -1,11 +1,4 @@
option (ENABLE_GTEST_LIBRARY "Enable gtest library" ${ENABLE_LIBRARIES})
if (NOT ENABLE_GTEST_LIBRARY)
if(USE_INTERNAL_GTEST_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal Google Test when ENABLE_GTEST_LIBRARY=OFF")
endif()
return()
endif()
# included only if ENABLE_TESTS=1
option (USE_INTERNAL_GTEST_LIBRARY "Set to FALSE to use system Google Test instead of bundled" ${NOT_UNBUNDLED})
@ -15,6 +8,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest/CMakeList
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal gtest")
set (USE_INTERNAL_GTEST_LIBRARY 0)
endif ()
set (MISSING_INTERNAL_GTEST_LIBRARY 1)
endif ()

View File

@ -1,4 +1,5 @@
set (SENTRY_LIBRARY "sentry")
set (SENTRY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native/include")
if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h")
message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init --recursive")

View File

@ -1,4 +1,4 @@
option(USE_SNAPPY "Enable support of snappy library" ${ENABLE_LIBRARIES})
option(USE_SNAPPY "Enable snappy library" ${ENABLE_LIBRARIES})
if(NOT USE_SNAPPY)
if (USE_INTERNAL_SNAPPY_LIBRARY)

View File

@ -1,11 +1,12 @@
option (FUZZER "Enable fuzzer: libfuzzer")
# see ./CMakeLists.txt for variable declaration
if (FUZZER)
if (FUZZER STREQUAL "libfuzzer")
# NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
# NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them (tests) have entry point for fuzzer and it's not checked.
# NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them
# (tests) have entry point for fuzzer and it's not checked.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=fuzzer-no-link")
endif()
@ -14,7 +15,6 @@ if (FUZZER)
if (NOT LIB_FUZZING_ENGINE)
set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer")
endif ()
else ()
message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}")
endif ()

View File

@ -6,26 +6,35 @@
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES)
option(PARALLEL_COMPILE_JOBS "Define the maximum number of concurrent compilation jobs" "")
# 1 if not set
option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "")
# 1 if not set
option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "")
if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY)
math(EXPR PARALLEL_COMPILE_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY})
if (NOT PARALLEL_COMPILE_JOBS)
set (PARALLEL_COMPILE_JOBS 1)
endif ()
endif ()
if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
endif ()
option(PARALLEL_LINK_JOBS "Define the maximum number of concurrent link jobs" "")
if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
math(EXPR PARALLEL_LINK_JOBS ${AVAILABLE_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY})
if (NOT PARALLEL_LINK_JOBS)
set (PARALLEL_LINK_JOBS 1)
endif ()
endif ()
if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
@ -33,5 +42,7 @@ if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS
endif ()
if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}")
message(STATUS
"${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory.
Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}")
endif ()

View File

@ -1,4 +1,5 @@
option (SANITIZE "Enable sanitizer: address, memory, thread, undefined" "")
# Possible values: `address` (ASan), `memory` (MSan), `thread` (TSan), `undefined` (UBSan), and "" (no sanitizing)
option (SANITIZE "Enable one of the code sanitizers" "")
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")

View File

@ -40,7 +40,9 @@ endif ()
STRING(REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION})
LIST(GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR)
# Example values: `lld-10`, `gold`.
option (LINKER_NAME "Linker name or full path")
if (COMPILER_GCC AND NOT LINKER_NAME)
find_program (LLD_PATH NAMES "ld.lld")
find_program (GOLD_PATH NAMES "ld.gold")

View File

@ -17,8 +17,9 @@ if (USE_DEBUG_HELPERS)
endif ()
# Add some warnings that are not available even with -Wall -Wextra -Wpedantic.
option (WEVERYTHING "Enables -Weverything option with some exceptions. This is intended for exploration of new compiler warnings that may be found to be useful. Only makes sense for clang." ON)
# Intended for exploration of new compiler warnings that may be found useful.
# Applies to clang only
option (WEVERYTHING "Enable -Weverything option with some exceptions." ON)
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
# Only in release build because debug has too large stack frames.

2
contrib/protobuf vendored

@ -1 +1 @@
Subproject commit d6a10dd3db55d8f7f9e464db9151874cde1f79ec
Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3

View File

@ -11,3 +11,7 @@ else ()
endif ()
add_subdirectory("${protobuf_SOURCE_DIR}/cmake" "${protobuf_BINARY_DIR}")
# We don't want to stop compilation on warnings in protobuf's headers.
# The following line overrides the value assigned by the command target_include_directories() in libprotobuf.cmake
set_property(TARGET libprotobuf PROPERTY INTERFACE_SYSTEM_INCLUDE_DIRECTORIES ${protobuf_SOURCE_DIR}/src)

4
debian/rules vendored
View File

@ -36,8 +36,8 @@ endif
CMAKE_FLAGS += -DENABLE_UTILS=0
DEB_CC ?= $(shell which gcc-9 gcc-8 gcc | head -n1)
DEB_CXX ?= $(shell which g++-9 g++-8 g++ | head -n1)
DEB_CC ?= $(shell which gcc-10 gcc-9 gcc | head -n1)
DEB_CXX ?= $(shell which g++-10 g++-9 g++ | head -n1)
ifdef DEB_CXX
DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)

View File

@ -89,7 +89,8 @@ EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG &
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections

View File

@ -97,7 +97,7 @@ ccache --zero-stats ||:
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/cmake_log.txt
ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/build_log.txt
time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/build_log.txt
ninja install | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/install_log.txt
@ -111,35 +111,10 @@ ln -s /test_output /var/log/clickhouse-server
cp "$CLICKHOUSE_DIR/programs/server/config.xml" /etc/clickhouse-server/
cp "$CLICKHOUSE_DIR/programs/server/users.xml" /etc/clickhouse-server/
mkdir -p /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/custom_settings_prefixes.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
#ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
# Keep original query_masking_rules.xml
ln -s --backup=simple --suffix=_original.xml /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
# install tests config
$CLICKHOUSE_DIR/tests/config/install.sh
# doesn't support SSL
rm -f /etc/clickhouse-server/config.d/secure_ports.xml
# Kill the server in case we are running locally and not in docker
kill_clickhouse
@ -216,7 +191,7 @@ TESTS_TO_SKIP=(
01460_DistributedFilesToInsert
)
clickhouse-test -j 4 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/test_log.txt
time clickhouse-test -j 8 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/test_log.txt
# substr is to remove semicolon after test name
@ -234,7 +209,7 @@ then
kill_clickhouse
# Clean the data so that there is no interference from the previous test run.
rm -rvf /var/lib/clickhouse ||:
rm -rf /var/lib/clickhouse ||:
mkdir /var/lib/clickhouse
clickhouse-server --config /etc/clickhouse-server/config.xml --daemon

View File

@ -48,7 +48,7 @@ function configure
cp -av "$repo_dir"/programs/server/config* db
cp -av "$repo_dir"/programs/server/user* db
# TODO figure out which ones are needed
cp -av "$repo_dir"/tests/config/listen.xml db/config.d
cp -av "$repo_dir"/tests/config/config.d/listen.xml db/config.d
cp -av "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
}

View File

@ -726,8 +726,8 @@ create view shortness
create table inconsistent_short_marking_report
engine File(TSV, 'report/unexpected-query-duration.tsv')
as select
multiIf(marked_short and time > 0.1, '"short" queries must run faster than 0.02 s',
not marked_short and time < 0.02, '"normal" queries must run longer than 0.1 s',
multiIf(marked_short and time > 0.1, '\"short\" queries must run faster than 0.02 s',
not marked_short and time < 0.02, '\"normal\" queries must run longer than 0.1 s',
'') problem,
marked_short, time,
test, query_index, query_display_name
@ -1065,7 +1065,7 @@ case "$stage" in
# to collect the logs. Prefer not to restart, because addresses might change
# and we won't be able to process trace_log data. Start in a subshell, so that
# it doesn't interfere with the watchdog through `wait`.
( get_profiles || restart && get_profiles ) ||:
( get_profiles || { restart && get_profiles ; } ) ||:
# Kill the whole process group, because somehow when the subshell is killed,
# the sleep inside remains alive and orphaned.

View File

@ -15,6 +15,7 @@ import sys
import time
import traceback
import xml.etree.ElementTree as et
from threading import Thread
from scipy import stats
def tsv_escape(s):
@ -23,10 +24,11 @@ def tsv_escape(s):
parser = argparse.ArgumentParser(description='Run performance test.')
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.")
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
@ -76,11 +78,16 @@ for e in root.findall('query'):
assert(len(test_queries) == len(is_short))
# If we're given a list of queries to run, check that it makes sense.
for i in args.queries_to_run or []:
if i < 0 or i >= len(test_queries):
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
exit(1)
# If we're only asked to print the queries, do that and exit
# If we're only asked to print the queries, do that and exit.
if args.print_queries:
for q in test_queries:
print(q)
for i in args.queries_to_run or range(0, len(test_queries)):
print(test_queries[i])
exit(0)
# Print short queries
@ -157,8 +164,11 @@ for t in tables:
print(f'skipped\t{tsv_escape(skipped_message)}')
sys.exit(0)
# Run create queries
create_query_templates = [q.text for q in root.findall('create_query')]
# Run create and fill queries. We will run them simultaneously for both servers,
# to save time.
# The weird search is to keep the relative order of elements, which matters, and
# etree doesn't support the appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
@ -170,23 +180,29 @@ for q in create_queries:
file = sys.stderr)
sys.exit(1)
for conn_index, c in enumerate(all_connections):
for q in create_queries:
c.execute(q)
print(f'create\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
# Run fill queries
fill_query_templates = [q.text for q in root.findall('fill_query')]
fill_queries = substitute_parameters(fill_query_templates)
for conn_index, c in enumerate(all_connections):
for q in fill_queries:
c.execute(q)
print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
threads = [Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
# Run the queries in randomized order, but preserve their indexes as specified
# in the test XML. To avoid using too much time, limit the number of queries
# we run per test.
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries or len(test_queries)))
for t in threads:
t.start()
for t in threads:
t.join()
queries_to_run = range(0, len(test_queries))
if args.max_queries:
# If specified, test a limited number of queries chosen at random.
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
if args.queries_to_run:
# Run the specified queries.
queries_to_run = args.queries_to_run
# Run test queries.
for query_index in queries_to_run:

View File

@ -187,8 +187,10 @@ def td(value, cell_attributes = ''):
cell_attributes = cell_attributes,
value = value)
def th(x):
return '<th>' + str(x) + '</th>'
def th(value, cell_attributes = ''):
return '<th {cell_attributes}>{value}</th>'.format(
cell_attributes = cell_attributes,
value = value)
def tableRow(cell_values, cell_attributes = [], anchor=None):
return tr(
@ -199,8 +201,13 @@ def tableRow(cell_values, cell_attributes = [], anchor=None):
if a is not None and v is not None]),
anchor)
def tableHeader(r):
return tr(''.join([th(f) for f in r]))
def tableHeader(cell_values, cell_attributes = []):
return tr(
''.join([th(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes,
fillvalue = '')
if a is not None and v is not None]))
def tableStart(title):
cls = '-'.join(title.lower().split(' ')[:3]);
@ -377,16 +384,16 @@ if args.report == 'main':
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', # 2
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 3
'p&nbsp;<&nbsp;0.01 threshold', # 4
# Failed # 5
'', # Failed # 5
'Test', # 6
'#', # 7
'Query', # 8
]
text += tableHeader(columns)
attrs = ['' for c in columns]
attrs[5] = None
text += tableHeader(columns, attrs)
for row in rows:
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}'
if int(row[5]):
@ -421,17 +428,17 @@ if args.report == 'main':
'New,&nbsp;s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'p&nbsp;&lt;&nbsp;0.01 threshold', #3
# Failed #4
'', # Failed #4
'Test', #5
'#', #6
'Query' #7
]
text = tableStart('Unstable Queries')
text += tableHeader(columns)
attrs = ['' for c in columns]
attrs[4] = None
text = tableStart('Unstable Queries')
text += tableHeader(columns, attrs)
for r in unstable_rows:
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}'
if int(r[4]):
@ -464,19 +471,19 @@ if args.report == 'main':
'Test', #0
'Wall clock time,&nbsp;s', #1
'Total client time,&nbsp;s', #2
'Total queries', #3
'Total queries', #3
'Longest query<br>(sum for all runs),&nbsp;s', #4
'Avg wall clock time<br>(sum for all runs),&nbsp;s', #5
'Shortest query<br>(sum for all runs),&nbsp;s', #6
# 'Runs' #7
'', # Runs #7
]
attrs = ['' for c in columns]
attrs[7] = None
text = tableStart('Test Times')
text += tableHeader(columns)
text += tableHeader(columns, attrs)
allowed_average_run_time = 3.75 # 60 seconds per test at 7 runs
allowed_average_run_time = 1.6 # 30 seconds per test at 7 runs
for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}'
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
@ -581,8 +588,8 @@ elif args.report == 'all-queries':
return
columns = [
# Changed #0
# Unstable #1
'', # Changed #0
'', # Unstable #1
'Old,&nbsp;s', #2
'New,&nbsp;s', #3
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', #4
@ -592,13 +599,13 @@ elif args.report == 'all-queries':
'#', #8
'Query', #9
]
text = tableStart('All Query Times')
text += tableHeader(columns)
attrs = ['' for c in columns]
attrs[0] = None
attrs[1] = None
text = tableStart('All Query Times')
text += tableHeader(columns, attrs)
for r in rows:
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}'
if int(r[1]):

View File

@ -8,26 +8,8 @@ dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
mkdir -p /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/database_atomic_configd.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/
fi
# install test configs
/usr/share/clickhouse-test/config/install.sh
function start()
{

View File

@ -48,28 +48,8 @@ mkdir -p /var/lib/clickhouse
mkdir -p /var/log/clickhouse-server
chmod 777 -R /var/log/clickhouse-server/
# Temorary way to keep CI green while moving dictionaries to separate directory
mkdir -p /etc/clickhouse-server/dict_examples
chmod 777 -R /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/;
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
# Retain any pre-existing config and allow ClickHouse to load those if required
ln -s --backup=simple --suffix=_original.xml \
/usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
# install test configs
/usr/share/clickhouse-test/config/install.sh
function start()
{

View File

@ -21,9 +21,7 @@ RUN apt-get update -y \
telnet \
tree \
unixodbc \
wget \
zookeeper \
zookeeperd
wget
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \

View File

@ -8,48 +8,9 @@ dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
mkdir -p /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/custom_settings_prefixes.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
# install test configs
/usr/share/clickhouse-test/config/install.sh
# Retain any pre-existing config and allow ClickHouse to load it if required
ln -s --backup=simple --suffix=_original.xml \
/usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/polymorphic_parts.xml /etc/clickhouse-server/config.d/
fi
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/database_atomic_configd.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/
fi
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
service zookeeper start
sleep 5
service clickhouse-server start && sleep 5
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then

View File

@ -66,9 +66,7 @@ RUN apt-get --allow-unauthenticated update -y \
unixodbc \
unixodbc-dev \
wget \
zlib1g-dev \
zookeeper \
zookeeperd
zlib1g-dev
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \

View File

@ -8,48 +8,9 @@ dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
mkdir -p /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/custom_settings_prefixes.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
# install test configs
/usr/share/clickhouse-test/config/install.sh
# Retain any pre-existing config and allow ClickHouse to load it if required
ln -s --backup=simple --suffix=_original.xml \
/usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/polymorphic_parts.xml /etc/clickhouse-server/config.d/
fi
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/database_atomic_configd.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/
fi
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
service zookeeper start
sleep 5
service clickhouse-server start && sleep 5
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then

View File

@ -11,8 +11,6 @@ RUN apt-get update -y \
tzdata \
fakeroot \
debhelper \
zookeeper \
zookeeperd \
expect \
python \
python-lxml \

View File

@ -39,41 +39,8 @@ mkdir -p /var/log/clickhouse-server
chmod 777 -R /var/lib/clickhouse
chmod 777 -R /var/log/clickhouse-server/
# Temorary way to keep CI green while moving dictionaries to separate directory
mkdir -p /etc/clickhouse-server/dict_examples
chmod 777 -R /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/;
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
# Retain any pre-existing config and allow ClickHouse to load it if required
ln -s --backup=simple --suffix=_original.xml \
/usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
service zookeeper start
sleep 5
# install test configs
/usr/share/clickhouse-test/config/install.sh
start_clickhouse

View File

@ -39,9 +39,8 @@ function start()
done
}
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
# install test configs
/usr/share/clickhouse-test/config/install.sh
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment

View File

@ -29,7 +29,7 @@ def get_options(i):
if 0 < i:
options += " --order=random"
if i % 2 == 1:
options += " --atomic-db-engine"
options += " --db-engine=Ordinary"
return options

View File

@ -35,7 +35,7 @@ RUN apt-get update \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN pip3 install urllib3 testflows==1.6.42 docker-compose docker dicttoxml kazoo tzlocal
RUN pip3 install urllib3 testflows==1.6.48 docker-compose docker dicttoxml kazoo tzlocal
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 17.09.1-ce

View File

@ -0,0 +1,121 @@
## Developer's guide for adding new CMake options
### Don't be obvious. Be informative.
Bad:
```cmake
option (ENABLE_TESTS "Enables testing" OFF)
```
This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose.
Better:
```cmake
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some
pre-conditions, leave a comment above the `option()` line and explain what it does.
The best way would be linking the docs page (if it exists).
The comment is parsed into a separate column (see below).
Even better:
```cmake
# implies ${TESTS_ARE_ENABLED}
# see tests/CMakeLists.txt for implementation detail.
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
### If the option's state could produce unwanted (or unusual) result, explicitly warn the user.
Suppose you have an option that may strip debug symbols from the ClickHouse's part.
This can speed up the linking process, but produces a binary that cannot be debugged.
In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong.
Also, such options should be disabled if applies.
Bad:
```cmake
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
Better:
```cmake
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions."
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
### In the option's description, explain WHAT the option does rather than WHY it does something.
The WHY explanation should be placed in the comment.
You may find that the option's name is self-descriptive.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better:
```cmake
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
### Don't assume other developers know as much as you do.
In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to
the tool's docs. It won't take much of your time.
Bad:
```cmake
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better (combined with the above hint):
```cmake
# https://clang.llvm.org/docs/ThinLTO.html
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
Other example, bad:
```cmake
option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF)
```
Better:
```cmake
# https://github.com/include-what-you-use/include-what-you-use
option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF)
```
### Prefer consistent default values.
CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`.
Prefer the `ON/OFF` values, if possible.

View File

@ -0,0 +1,34 @@
# CMake in ClickHouse
## TL; DR How to make ClickHouse compile and link faster?
Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`.
```cmake
cmake .. \
-DCMAKE_C_COMPILER=/bin/clang-10 \
-DCMAKE_CXX_COMPILER=/bin/clang++-10 \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DUSE_STATIC_LIBRARIES=OFF \
-DCLICKHOUSE_SPLIT_BINARY=ON \
-DSPLIT_SHARED_LIBRARIES=ON \
-DENABLE_LIBRARIES=OFF \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```
## CMake files types
1. ClickHouse's source CMake files (located in the root directory and in `/src`).
2. Arch-dependent CMake files (located in `/cmake/*os_name*`).
3. Libraries finders (search for contrib libraries, located in `/cmake/find`).
3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`)
## List of CMake flags
* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py).
* The flag name is a link to its position in the code.
* If an option's default value is itself an option, it's also a link to its position in this list.

View File

@ -38,7 +38,7 @@ toc_title: Adopters
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| <a href="https://www.ecwid.com/" class="favicon">Ecwid</a> | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) |
| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | TBA | — | — | [Webinar, Sep 2020](https://altinity.com/webinarspage/2020/09/08/migrating-from-druid-to-next-gen-olap-on-clickhouse-ebays-experience) |
| <a href="https://www.ebay.com/" class="favicon">eBay</a> | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) |
| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |

View File

@ -0,0 +1,10 @@
---
toc_priority: 100
---
# Information support {#information-support}
- Email address: <a class="feedback-email"></a>
- Phone: <a href="tel:+74957806510">+7-495-780-6510</a>
[Original article](https://clickhouse.tech/docs/en/introduction/info/) <!--hide-->

View File

@ -0,0 +1,42 @@
# ClickHouse obfuscator
Simple tool for table data obfuscation.
It reads input table and produces output table, that retain some properties of input, but contains different data.
It allows to publish almost real production data for usage in benchmarks.
It is designed to retain the following properties of data:
- cardinalities of values (number of distinct values) for every column and for every tuple of columns;
- conditional cardinalities: number of distinct values of one column under condition on value of another column;
- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats;
- probability distributions of length of strings;
- probability of zero values of numbers; empty strings and arrays, NULLs;
- data compression ratio when compressed with LZ77 and entropy family of codecs;
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
- date component of DateTime values;
- UTF-8 validity of string values;
- string values continue to look somewhat natural.
Most of the properties above are viable for performance testing:
reading data, filtering, aggregation and sorting will work at almost the same speed
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed.
Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret.
It use some cryptographic primitives to transform data, but from the cryptographic point of view,
It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it.
It may retain some data you don't want to publish.
It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data.
For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value.
So, the user will be able to count exact ratio of mobile traffic.
Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address.
If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others,
It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them.
And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters.
This tool works fine only with reasonable amount of data (at least 1000s of rows).

View File

@ -9,8 +9,7 @@ toc_title: Working with maps
Collect all the keys and sum corresponding values.
Arguments are tuples of two arrays, where items in the first array represent keys, and the second array
contains values for the each key.
Arguments are tuples of two arrays, where items in the first array represent keys, and the second array contains values for the each key.
All key arrays should have same type, and all value arrays should contain items which are promotable to the one type (Int64, UInt64 or Float64).
The common promoted type is used as a type for the result array.
@ -30,8 +29,7 @@ SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTy
Collect all the keys and subtract corresponding values.
Arguments are tuples of two arrays, where items in the first array represent keys, and the second array
contains values for the each key.
Arguments are tuples of two arrays, where items in the first array represent keys, and the second array contains values for the each key.
All key arrays should have same type, and all value arrays should contain items which are promotable to the one type (Int64, UInt64 or Float64).
The common promoted type is used as a type for the result array.
@ -45,25 +43,24 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
┌─res────────────┬─type──────────────────────────────┐
│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │
└────────────────┴───────────────────────────────────┘
````
```
## mapPopulateSeries {#function-mappopulateseries}
Syntax: `mapPopulateSeries((keys : Array(<IntegerType>), values : Array(<IntegerType>)[, max : <IntegerType>])`
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with step size of one,
and corresponding values taken from `values` array. If the value is not specified for the key, then it uses default value in the resulting map.
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses default value in the resulting map.
For repeated keys only the first value (in order of appearing) gets associated with the key.
The number of elements in `keys` and `values` must be the same for each row.
Returns a tuple of two arrays: keys in sorted order, and values the corresponding keys.
``` sql
```sql
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
```
``` text
```text
┌─res──────────────────────────┬─type──────────────────────────────┐
│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │
└──────────────────────────────┴───────────────────────────────────┘

View File

@ -7,6 +7,6 @@ toc_priority: 100
Информационная поддержка ClickHouse осуществляется на всей территории Российской Федерации без ограничений посредством использования телефонной связи и средств электронной почты на русском языке в круглосуточном режиме:
- Адрес электронной почты: <a class="feedback-email"></a>
- Телефон: <a href="tel:88002509639">8-800-250-96-39</a> (звонки бесплатны из всех регионов России)
- Телефон: <a href="tel:+74957806510">+7-495-780-6510</a>
[Оригинальная статья](https://clickhouse.tech/docs/ru/introduction/info/) <!--hide-->

View File

@ -28,6 +28,7 @@ import test
import util
import website
from cmake_in_clickhouse_generator import generate_cmake_flags_files
class ClickHouseMarkdown(markdown.extensions.Extension):
class ClickHousePreprocessor(markdown.util.Processor):
@ -184,6 +185,8 @@ def build(args):
test.test_templates(args.website_dir)
if not args.skip_docs:
generate_cmake_flags_files(os.path.join(os.path.dirname(__file__), '..', '..'))
build_docs(args)
from github import build_releases
build_releases(args, build_docs)
@ -200,6 +203,7 @@ def build(args):
if __name__ == '__main__':
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
website_dir = os.path.join('..', 'website')
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,tr,fa')
arg_parser.add_argument('--blog-lang', default='en,ru')

View File

@ -0,0 +1,152 @@
import re
import os
from typing import TextIO, List, Tuple, Optional, Dict
# name, default value, description
Entity = Tuple[str, str, str]
# https://regex101.com/r/R6iogw/12
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})"
default_anchor_str: str = "[`{name}`](#{anchor})"
comment_var_regex: str = r"\${(.+)}"
comment_var_replace: str = "`\\1`"
table_header: str = """
| Name | Default value | Description | Comment |
|------|---------------|-------------|---------|
"""
# Needed to detect conditional variables (those which are defined twice)
# name -> (path, values)
entities: Dict[str, Tuple[str, str]] = {}
def make_anchor(t: str) -> str:
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"])
def process_comment(comment: str) -> str:
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
(line, comment) = line_comment
(name, description, default) = entity
if name in entities:
return
# cannot escape the { in macro option description -> invalid AMP html
# Skipping "USE_INTERNAL_${LIB_NAME_UC}_LIBRARY"
if "LIB_NAME_UC" in name:
return
if len(default) == 0:
formatted_default: str = "`OFF`"
elif default[0] == "$":
formatted_default: str = "`{}`".format(default[2:-1])
else:
formatted_default: str = "`" + default + "`"
formatted_name: str = name_str.format(
anchor=make_anchor(name),
name=name,
path=path,
line=line if line > 0 else 1)
formatted_description: str = "".join(description.split("\n"))
formatted_comment: str = process_comment(comment)
formatted_entity: str = "| {} | {} | {} | {} |".format(
formatted_name, formatted_default, formatted_description, formatted_comment)
entities[name] = path, formatted_entity
def process_file(root_path: str, input_name: str) -> None:
with open(os.path.join(root_path, input_name), 'r') as cmake_file:
contents: str = cmake_file.read()
def get_line_and_comment(target: str) -> Tuple[int, str]:
contents_list: List[str] = contents.split("\n")
comment: str = ""
for n, line in enumerate(contents_list):
if line.find(target) == -1:
continue
for maybe_comment_line in contents_list[n - 1::-1]:
if not re.match("\s*#\s*", maybe_comment_line):
break
comment = re.sub("\s*#\s*", "", maybe_comment_line) + " " + comment
return n, comment
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE)
if matches:
for entity in matches:
build_entity(os.path.join(root_path[6:], input_name), entity, get_line_and_comment(entity[0]))
def process_folder(root_path:str, name: str) -> None:
for root, _, files in os.walk(os.path.join(root_path, name)):
for f in files:
if f == "CMakeLists.txt" or ".cmake" in f:
process_file(root, f)
def generate_cmake_flags_files(root_path: str) -> None:
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md")
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md")
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
process_file(root_path, "CMakeLists.txt")
process_file(root_path, "programs/CMakeLists.txt")
process_folder(root_path, "base")
process_folder(root_path, "cmake")
process_folder(root_path, "src")
with open(output_file_name, "w") as f:
with open(header_file_name, "r") as header:
f.write(header.read())
sorted_keys: List[str] = sorted(entities.keys())
ignored_keys: List[str] = []
f.write("### ClickHouse modes\n" + table_header)
for k in sorted_keys:
if k.startswith("ENABLE_CLICKHOUSE_"):
f.write(entities[k][1] + "\n")
ignored_keys.append(k)
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" +
table_header)
for k in sorted_keys:
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
f.write(entities[k][1] + "\n")
ignored_keys.append(k)
f.write("\n### External libraries system/bundled mode\n" + table_header)
for k in sorted_keys:
if k.startswith("USE_INTERNAL_"):
f.write(entities[k][1] + "\n")
ignored_keys.append(k)
f.write("\n### Other flags\n" + table_header)
for k in sorted(set(sorted_keys).difference(set(ignored_keys))):
f.write(entities[k][1] + "\n")
with open(footer_file_name, "r") as footer:
f.write(footer.read())
if __name__ == '__main__':
generate_cmake_flags_files("../../")

View File

@ -49,13 +49,14 @@ def translate_impl(text, target_language=None):
def translate(text, target_language=None):
result = []
for part in re.split(curly_braces_re, text):
if part.startswith('{') and part.endswith('}'):
result.append(part)
else:
result.append(translate_impl(part, target_language=target_language))
return ''.join(result)
return "".join(
[
part
if part.startswith("{") and part.endswith("}")
else translate_impl(part, target_language=target_language)
for part in re.split(curly_braces_re, text)
]
)
def translate_toc(root, lang):

View File

@ -3,7 +3,7 @@ machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# 在运营商 {#select-in-operators}
# IN 操作符 {#select-in-operators}
`IN`, `NOT IN`, `GLOBAL IN`,和 `GLOBAL NOT IN` 运算符是单独复盖的,因为它们的功能相当丰富。

View File

@ -2,31 +2,49 @@ if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()
# 'clickhouse' binary is a multi purpose tool,
# that contain multiple execution modes (client, server, etc.)
# each of them is built and linked as a separate library, defined below.
# The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.),
# each of them may be built and linked as a separate library.
# If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only.
option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" ON)
option (ENABLE_CLICKHOUSE_ALL "Enable all tools" ON)
option (ENABLE_CLICKHOUSE_SERVER "Enable clickhouse-server" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_CLIENT "Enable clickhouse-client" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_LOCAL "Enable clickhouse-local" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_BENCHMARK "Enable clickhouse-benchmark" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG "Enable clickhouse-extract-from-config" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable clickhouse-compressor" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_COPIER "Enable clickhouse-copier" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_FORMAT "Enable clickhouse-format" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_OBFUSCATOR "Enable clickhouse-obfuscator" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_GIT_IMPORT "Enable clickhouse-git-import" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "Enable clickhouse-odbc-bridge" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
${ENABLE_CLICKHOUSE_ALL})
# https://clickhouse.tech/docs/en/operations/utilities/clickhouse-local/
option (ENABLE_CLICKHOUSE_LOCAL "Local files fast processing mode" ${ENABLE_CLICKHOUSE_ALL})
# https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark/
option (ENABLE_CLICKHOUSE_BENCHMARK "Queries benchmarking mode" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG "Configs processor (extract values etc.)" ${ENABLE_CLICKHOUSE_ALL})
# https://clickhouse.tech/docs/en/operations/utilities/clickhouse-compressor/
option (ENABLE_CLICKHOUSE_COMPRESSOR "Data compressor and decompressor" ${ENABLE_CLICKHOUSE_ALL})
# https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/
option (ENABLE_CLICKHOUSE_COPIER "Inter-cluster data copying mode" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_FORMAT "Queries pretty-printer and formatter with syntax highlighting"
${ENABLE_CLICKHOUSE_ALL})
# https://clickhouse.tech/docs/en/operations/utilities/clickhouse-obfuscator/
option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data to benchmark-ready one)"
${ENABLE_CLICKHOUSE_ALL})
# https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY)
option (ENABLE_CLICKHOUSE_INSTALL "Enable clickhouse-install" OFF)
option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)" OFF)
else ()
option (ENABLE_CLICKHOUSE_INSTALL "Enable clickhouse-install" ${ENABLE_CLICKHOUSE_ALL})
option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)"
${ENABLE_CLICKHOUSE_ALL})
endif ()
if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
set(CLICKHOUSE_ONE_SHARED 1)
set(CLICKHOUSE_ONE_SHARED ON)
endif()
configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)

View File

@ -80,7 +80,7 @@ Suggest::Suggest()
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC",
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "FOR", "RANDOMIZED",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE"};
}

View File

@ -1477,7 +1477,9 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
{
auto create_query_push_ast = rewriteCreateQueryStorage(task_shard.current_pull_table_create_query,
task_table.table_push, task_table.engine_push_ast);
create_query_push_ast->as<ASTCreateQuery &>().if_not_exists = true;
auto & create = create_query_push_ast->as<ASTCreateQuery &>();
create.if_not_exists = true;
InterpreterCreateQuery::prepareOnClusterQuery(create, context, task_table.cluster_push_name);
String query = queryToString(create_query_push_ast);
LOG_DEBUG(log, "Create destination tables. Query: {}", query);

View File

@ -215,31 +215,20 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
return primary_key_columns;
}
String extractReplicatedTableZookeeperPath(const ASTPtr & storage_ast)
bool isReplicatedTableEngine(const ASTPtr & storage_ast)
{
String storage_str = queryToString(storage_ast);
const auto & storage = storage_ast->as<ASTStorage &>();
const auto & engine = storage.engine->as<ASTFunction &>();
if (!endsWith(engine.name, "MergeTree"))
{
String storage_str = queryToString(storage_ast);
throw Exception(
"Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported",
ErrorCodes::BAD_ARGUMENTS);
}
if (!startsWith(engine.name, "Replicated"))
{
return "";
}
auto replicated_table_arguments = engine.arguments->children;
auto zk_table_path_ast = replicated_table_arguments[0]->as<ASTLiteral &>();
auto zk_table_path_string = zk_table_path_ast.value.safeGet<String>();
return zk_table_path_string;
return startsWith(engine.name, "Replicated");
}
ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random)

View File

@ -200,7 +200,7 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast);
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast);
String extractReplicatedTableZookeeperPath(const ASTPtr & storage_ast);
bool isReplicatedTableEngine(const ASTPtr & storage_ast);
ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random);

View File

@ -48,7 +48,7 @@ struct TaskTable
String getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const;
bool isReplicatedTable() const { return engine_push_zk_path != ""; }
bool isReplicatedTable() const { return is_replicated_table; }
/// Partitions will be split into number-of-splits pieces.
/// Each piece will be copied independently. (10 by default)
@ -78,6 +78,7 @@ struct TaskTable
/// First argument of Replicated...MergeTree()
String engine_push_zk_path;
bool is_replicated_table;
ASTPtr rewriteReplicatedCreateQueryToPlain();
@ -269,7 +270,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
primary_key_comma_separated = Nested::createCommaSeparatedStringFrom(extractPrimaryKeyColumnNames(engine_push_ast));
engine_push_zk_path = extractReplicatedTableZookeeperPath(engine_push_ast);
is_replicated_table = isReplicatedTableEngine(engine_push_ast);
}
sharding_key_str = config.getString(table_prefix + "sharding_key");
@ -372,15 +373,18 @@ inline ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain()
auto & new_storage_ast = prev_engine_push_ast->as<ASTStorage &>();
auto & new_engine_ast = new_storage_ast.engine->as<ASTFunction &>();
auto & replicated_table_arguments = new_engine_ast.arguments->children;
/// Delete first two arguments of Replicated...MergeTree() table.
replicated_table_arguments.erase(replicated_table_arguments.begin());
replicated_table_arguments.erase(replicated_table_arguments.begin());
/// Remove replicated from name
/// Remove "Replicated" from name
new_engine_ast.name = new_engine_ast.name.substr(10);
if (new_engine_ast.arguments)
{
auto & replicated_table_arguments = new_engine_ast.arguments->children;
/// Delete first two arguments of Replicated...MergeTree() table.
replicated_table_arguments.erase(replicated_table_arguments.begin());
replicated_table_arguments.erase(replicated_table_arguments.begin());
}
return new_storage_ast.clone();
}

View File

@ -5,6 +5,9 @@ set (CLICKHOUSE_FORMAT_LINK
boost::program_options
clickhouse_common_io
clickhouse_parsers
clickhouse_functions
clickhouse_aggregate_functions
clickhouse_table_functions
dbms
)

View File

@ -1,13 +1,29 @@
#include <iostream>
#include <string_view>
#include <functional>
#include <boost/program_options.hpp>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/obfuscateQueries.h>
#include <Common/TerminalSize.h>
#include <Interpreters/Context.h>
#include <Functions/FunctionFactory.h>
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Storages/StorageFactory.h>
#include <Storages/registerStorages.h>
#include <DataTypes/DataTypeFactory.h>
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
@ -22,6 +38,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
("oneline", "format in single line")
("quiet,q", "just check syntax, no output on success")
("multiquery,n", "allow multiple queries in the same file")
("obfuscate", "obfuscate instead of formatting")
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
boost::program_options::variables_map options;
@ -40,10 +58,17 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
bool oneline = options.count("oneline");
bool quiet = options.count("quiet");
bool multiple = options.count("multiquery");
bool obfuscate = options.count("obfuscate");
if (quiet && (hilite || oneline))
if (quiet && (hilite || oneline || obfuscate))
{
std::cerr << "Options 'hilite' or 'oneline' have no sense in 'quiet' mode." << std::endl;
std::cerr << "Options 'hilite' or 'oneline' or 'obfuscate' have no sense in 'quiet' mode." << std::endl;
return 2;
}
if (obfuscate && (hilite || oneline || quiet))
{
std::cerr << "Options 'hilite' or 'oneline' or 'quiet' have no sense in 'obfuscate' mode." << std::endl;
return 2;
}
@ -51,21 +76,66 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
ReadBufferFromFileDescriptor in(STDIN_FILENO);
readStringUntilEOF(query, in);
const char * pos = query.data();
const char * end = pos + query.size();
ParserQuery parser(end);
do
if (obfuscate)
{
ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
if (!quiet)
WordMap obfuscated_words_map;
WordSet used_nouns;
SipHash hash_func;
if (options.count("seed"))
{
formatAST(*res, std::cout, hilite, oneline);
if (multiple)
std::cout << "\n;\n";
std::cout << std::endl;
std::string seed;
hash_func.update(options["seed"].as<std::string>());
}
} while (multiple && pos != end);
SharedContextHolder shared_context = Context::createShared();
Context context = Context::createGlobal(shared_context.get());
context.makeGlobalContext();
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerStorages();
std::unordered_set<std::string> additional_names;
auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames();
auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames();
additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end());
additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end());
KnownIdentifierFunc is_known_identifier = [&](std::string_view name)
{
std::string what(name);
return FunctionFactory::instance().tryGet(what, context) != nullptr
|| AggregateFunctionFactory::instance().isAggregateFunctionName(what)
|| TableFunctionFactory::instance().isTableFunctionName(what)
|| additional_names.count(what);
};
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
obfuscateQueries(query, out, obfuscated_words_map, used_nouns, hash_func, is_known_identifier);
}
else
{
const char * pos = query.data();
const char * end = pos + query.size();
ParserQuery parser(end);
do
{
ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
if (!quiet)
{
formatAST(*res, std::cout, hilite, oneline);
if (multiple)
std::cout << "\n;\n";
std::cout << std::endl;
}
} while (multiple && pos != end);
}
}
catch (...)
{

View File

@ -3,7 +3,7 @@ set(CLICKHOUSE_SERVER_SOURCES
Server.cpp
)
if (OS_LINUX AND ARCH_AMD64)
if (OS_LINUX)
set (LINK_CONFIG_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()

View File

@ -339,16 +339,23 @@ int Server::main(const std::vector<std::string> & /*args*/)
{
if (hasLinuxCapability(CAP_IPC_LOCK))
{
/// Get the memory area with (current) code segment.
/// It's better to lock only the code segment instead of calling "mlockall",
/// because otherwise debug info will be also locked in memory, and it can be huge.
auto [addr, len] = getMappedArea(reinterpret_cast<void *>(mainEntryClickHouseServer));
try
{
/// Get the memory area with (current) code segment.
/// It's better to lock only the code segment instead of calling "mlockall",
/// because otherwise debug info will be also locked in memory, and it can be huge.
auto [addr, len] = getMappedArea(reinterpret_cast<void *>(mainEntryClickHouseServer));
LOG_TRACE(log, "Will do mlock to prevent executable memory from being paged out. It may take a few seconds.");
if (0 != mlock(addr, len))
LOG_WARNING(log, "Failed mlock: {}", errnoToString(ErrorCodes::SYSTEM_ERROR));
else
LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed, total {}", ReadableSize(len));
LOG_TRACE(log, "Will do mlock to prevent executable memory from being paged out. It may take a few seconds.");
if (0 != mlock(addr, len))
LOG_WARNING(log, "Failed mlock: {}", errnoToString(ErrorCodes::SYSTEM_ERROR));
else
LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed, total {}", ReadableSize(len));
}
catch (...)
{
LOG_WARNING(log, "Cannot mlock: {}", getCurrentExceptionMessage(false));
}
}
else
{
@ -664,6 +671,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
/// Set current database name before loading tables and databases because
/// system logs may copy global context.
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading metadata from {}", path);
try
@ -671,11 +682,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
loadMetadataSystem(*global_context);
/// After attaching system databases we can initialize system log.
global_context->initializeSystemLogs();
auto & database_catalog = DatabaseCatalog::instance();
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
attachSystemTablesServer(*DatabaseCatalog::instance().getSystemDatabase(), has_zookeeper);
attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper);
/// Then, load remaining databases
loadMetadata(*global_context, default_database);
DatabaseCatalog::instance().loadDatabases();
database_catalog.loadDatabases();
/// After loading validate that default database exists
database_catalog.assertDatabaseExists(default_database);
}
catch (...)
{
@ -738,8 +752,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
global_context->setCurrentDatabase(default_database);
if (has_zookeeper && config().has("distributed_ddl"))
{
/// DDL worker should be started after all tables were loaded

10
release
View File

@ -95,9 +95,9 @@ then
exit 3
fi
export DEB_CC=${DEB_CC=clang-6.0}
export DEB_CXX=${DEB_CXX=clang++-6.0}
EXTRAPACKAGES="$EXTRAPACKAGES clang-6.0 lld-6.0"
export DEB_CC=${DEB_CC=clang-10}
export DEB_CXX=${DEB_CXX=clang++-10}
EXTRAPACKAGES="$EXTRAPACKAGES clang-10 lld-10"
elif [[ $BUILD_TYPE == 'valgrind' ]]; then
MALLOC_OPTS="-DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0"
VERSION_POSTFIX+="+valgrind"
@ -118,8 +118,8 @@ echo -e "\nCurrent version is $VERSION_STRING"
if [ -z "$NO_BUILD" ] ; then
gen_changelog "$VERSION_STRING" "" "$AUTHOR" ""
if [ -z "$USE_PBUILDER" ] ; then
DEB_CC=${DEB_CC:=`which gcc-9 gcc-8 gcc | head -n1`}
DEB_CXX=${DEB_CXX:=`which g++-9 g++-8 g++ | head -n1`}
DEB_CC=${DEB_CC:=`which gcc-10 gcc-9 gcc | head -n1`}
DEB_CXX=${DEB_CXX:=`which gcc-10 g++-9 g++ | head -n1`}
# Build (only binary packages).
debuild --preserve-env -e PATH \
-e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \

View File

@ -192,7 +192,7 @@ namespace
}
std::vector<AccessEntityPtr> parseUsers(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log)
std::vector<AccessEntityPtr> parseUsers(const Poco::Util::AbstractConfiguration & config)
{
Poco::Util::AbstractConfiguration::Keys user_names;
config.keys("users", user_names);
@ -200,16 +200,8 @@ namespace
std::vector<AccessEntityPtr> users;
users.reserve(user_names.size());
for (const auto & user_name : user_names)
{
try
{
users.push_back(parseUser(config, user_name));
}
catch (...)
{
tryLogCurrentException(log, "Could not parse user " + backQuote(user_name));
}
}
users.push_back(parseUser(config, user_name));
return users;
}
@ -256,12 +248,11 @@ namespace
}
quota->to_roles.add(user_ids);
return quota;
}
std::vector<AccessEntityPtr> parseQuotas(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log)
std::vector<AccessEntityPtr> parseQuotas(const Poco::Util::AbstractConfiguration & config)
{
Poco::Util::AbstractConfiguration::Keys user_names;
config.keys("users", user_names);
@ -278,76 +269,63 @@ namespace
quotas.reserve(quota_names.size());
for (const auto & quota_name : quota_names)
{
try
{
auto it = quota_to_user_ids.find(quota_name);
const std::vector<UUID> & quota_users = (it != quota_to_user_ids.end()) ? std::move(it->second) : std::vector<UUID>{};
quotas.push_back(parseQuota(config, quota_name, quota_users));
}
catch (...)
{
tryLogCurrentException(log, "Could not parse quota " + backQuote(quota_name));
}
auto it = quota_to_user_ids.find(quota_name);
const std::vector<UUID> & quota_users = (it != quota_to_user_ids.end()) ? std::move(it->second) : std::vector<UUID>{};
quotas.push_back(parseQuota(config, quota_name, quota_users));
}
return quotas;
}
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log)
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config)
{
std::map<std::pair<String /* database */, String /* table */>, std::unordered_map<String /* user */, String /* filter */>> all_filters_map;
Poco::Util::AbstractConfiguration::Keys user_names;
config.keys("users", user_names);
try
for (const String & user_name : user_names)
{
config.keys("users", user_names);
for (const String & user_name : user_names)
const String databases_config = "users." + user_name + ".databases";
if (config.has(databases_config))
{
const String databases_config = "users." + user_name + ".databases";
if (config.has(databases_config))
Poco::Util::AbstractConfiguration::Keys database_keys;
config.keys(databases_config, database_keys);
/// Read tables within databases
for (const String & database_key : database_keys)
{
Poco::Util::AbstractConfiguration::Keys database_keys;
config.keys(databases_config, database_keys);
const String database_config = databases_config + "." + database_key;
/// Read tables within databases
for (const String & database_key : database_keys)
String database_name;
if (((database_key == "database") || (database_key.starts_with("database["))) && config.has(database_config + "[@name]"))
database_name = config.getString(database_config + "[@name]");
else if (size_t bracket_pos = database_key.find('['); bracket_pos != std::string::npos)
database_name = database_key.substr(0, bracket_pos);
else
database_name = database_key;
Poco::Util::AbstractConfiguration::Keys table_keys;
config.keys(database_config, table_keys);
/// Read table properties
for (const String & table_key : table_keys)
{
const String database_config = databases_config + "." + database_key;
String database_name;
if (((database_key == "database") || (database_key.starts_with("database["))) && config.has(database_config + "[@name]"))
database_name = config.getString(database_config + "[@name]");
else if (size_t bracket_pos = database_key.find('['); bracket_pos != std::string::npos)
database_name = database_key.substr(0, bracket_pos);
String table_config = database_config + "." + table_key;
String table_name;
if (((table_key == "table") || (table_key.starts_with("table["))) && config.has(table_config + "[@name]"))
table_name = config.getString(table_config + "[@name]");
else if (size_t bracket_pos = table_key.find('['); bracket_pos != std::string::npos)
table_name = table_key.substr(0, bracket_pos);
else
database_name = database_key;
table_name = table_key;
Poco::Util::AbstractConfiguration::Keys table_keys;
config.keys(database_config, table_keys);
/// Read table properties
for (const String & table_key : table_keys)
{
String table_config = database_config + "." + table_key;
String table_name;
if (((table_key == "table") || (table_key.starts_with("table["))) && config.has(table_config + "[@name]"))
table_name = config.getString(table_config + "[@name]");
else if (size_t bracket_pos = table_key.find('['); bracket_pos != std::string::npos)
table_name = table_key.substr(0, bracket_pos);
else
table_name = table_key;
String filter_config = table_config + ".filter";
all_filters_map[{database_name, table_name}][user_name] = config.getString(filter_config);
}
String filter_config = table_config + ".filter";
all_filters_map[{database_name, table_name}][user_name] = config.getString(filter_config);
}
}
}
}
catch (...)
{
tryLogCurrentException(log, "Could not parse row policies");
}
std::vector<AccessEntityPtr> policies;
for (auto & [database_and_table_name, user_to_filters] : all_filters_map)
@ -450,23 +428,14 @@ namespace
std::vector<AccessEntityPtr> parseSettingsProfiles(
const Poco::Util::AbstractConfiguration & config,
const std::function<void(const std::string_view &)> & check_setting_name_function,
Poco::Logger * log)
const std::function<void(const std::string_view &)> & check_setting_name_function)
{
std::vector<AccessEntityPtr> profiles;
Poco::Util::AbstractConfiguration::Keys profile_names;
config.keys("profiles", profile_names);
for (const auto & profile_name : profile_names)
{
try
{
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
}
catch (...)
{
tryLogCurrentException(log, "Could not parse profile " + backQuote(profile_name));
}
}
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
return profiles;
}
}
@ -520,13 +489,13 @@ void UsersConfigAccessStorage::setConfig(const Poco::Util::AbstractConfiguration
void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & entity : parseUsers(config, getLogger()))
for (const auto & entity : parseUsers(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseQuotas(config, getLogger()))
for (const auto & entity : parseQuotas(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseRowPolicies(config, getLogger()))
for (const auto & entity : parseRowPolicies(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function, getLogger()))
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function))
all_entities.emplace_back(generateID(*entity), entity);
memory_storage.setAll(all_entities);
}

View File

@ -1,22 +1,12 @@
#pragma once
#include <cstddef>
#include <cstdint>
#include <cassert>
#include <type_traits>
#include <common/defines.h>
/** Returns log2 of number, rounded down.
* Compiles to single 'bsr' instruction on x86.
* For zero argument, result is unspecified.
*/
inline unsigned int bitScanReverse(unsigned int x)
{
assert(x != 0);
return sizeof(unsigned int) * 8 - 1 - __builtin_clz(x);
}
/** For zero argument, result is zero.
* For arguments with most significand bit set, result is n.
* For other arguments, returns value, rounded up to power of two.
@ -41,10 +31,9 @@ inline size_t roundUpToPowerOfTwoOrZero(size_t n)
template <typename T>
inline size_t getLeadingZeroBits(T x)
inline size_t getLeadingZeroBitsUnsafe(T x)
{
if (!x)
return sizeof(x) * 8;
assert(x != 0);
if constexpr (sizeof(T) <= sizeof(unsigned int))
{
@ -60,10 +49,32 @@ inline size_t getLeadingZeroBits(T x)
}
}
template <typename T>
inline size_t getLeadingZeroBits(T x)
{
if (!x)
return sizeof(x) * 8;
return getLeadingZeroBitsUnsafe(x);
}
/** Returns log2 of number, rounded down.
* Compiles to single 'bsr' instruction on x86.
* For zero argument, result is unspecified.
*/
template <typename T>
inline uint32_t bitScanReverse(T x)
{
return (std::max<size_t>(sizeof(T), sizeof(unsigned int))) * 8 - 1 - getLeadingZeroBitsUnsafe(x);
}
// Unsafe since __builtin_ctz()-family explicitly state that result is undefined on x == 0
template <typename T>
inline size_t getTrailingZeroBitsUnsafe(T x)
{
assert(x != 0);
if constexpr (sizeof(T) <= sizeof(unsigned int))
{
return __builtin_ctz(x);
@ -88,8 +99,8 @@ inline size_t getTrailingZeroBits(T x)
}
/** Returns a mask that has '1' for `bits` LSB set:
* maskLowBits<UInt8>(3) => 00000111
*/
* maskLowBits<UInt8>(3) => 00000111
*/
template <typename T>
inline T maskLowBits(unsigned char bits)
{

View File

@ -138,6 +138,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac
if (throw_on_error)
throw;
tryLogCurrentException(log, "Error updating configuration from '" + path + "' config.");
return;
}
LOG_DEBUG(log, "Loaded config '{}', performed update on configuration", path);

View File

@ -23,18 +23,15 @@ Macros::Macros(const Poco::Util::AbstractConfiguration & config, const String &
}
String Macros::expand(const String & s,
size_t level,
const String & database_name,
const String & table_name,
const UUID & uuid) const
MacroExpansionInfo & info) const
{
if (s.find('{') == String::npos)
return s;
if (level && s.size() > 65536)
if (info.level && s.size() > 65536)
throw Exception("Too long string while expanding macros", ErrorCodes::SYNTAX_ERROR);
if (level >= 10)
if (info.level >= 10)
throw Exception("Too deep recursion while expanding macros: '" + s + "'", ErrorCodes::SYNTAX_ERROR);
String res;
@ -64,17 +61,28 @@ String Macros::expand(const String & s,
/// Prefer explicit macros over implicit.
if (it != macros.end())
res += it->second;
else if (macro_name == "database" && !database_name.empty())
res += database_name;
else if (macro_name == "table" && !table_name.empty())
res += table_name;
else if (macro_name == "database" && !info.database_name.empty())
res += info.database_name;
else if (macro_name == "table" && !info.table_name.empty())
res += info.table_name;
else if (macro_name == "uuid")
{
if (uuid == UUIDHelpers::Nil)
if (info.uuid == UUIDHelpers::Nil)
throw Exception("Macro 'uuid' and empty arguments of ReplicatedMergeTree "
"are supported only for ON CLUSTER queries with Atomic database engine",
ErrorCodes::SYNTAX_ERROR);
res += toString(uuid);
/// For ON CLUSTER queries we don't want to require all macros definitions in initiator's config.
/// However, initiator must check that for cross-replication cluster zookeeper_path does not contain {uuid} macro.
/// It becomes impossible to check if {uuid} is contained inside some unknown macro.
if (info.level)
throw Exception("Macro 'uuid' should not be inside another macro", ErrorCodes::SYNTAX_ERROR);
res += toString(info.uuid);
info.expanded_uuid = true;
}
else if (info.ignore_unknown)
{
res += macro_name;
info.has_unknown = true;
}
else
throw Exception("No macro '" + macro_name +
@ -84,7 +92,8 @@ String Macros::expand(const String & s,
pos = end + 1;
}
return expand(res, level + 1, database_name, table_name);
++info.level;
return expand(res, info);
}
String Macros::getValue(const String & key) const
@ -94,9 +103,20 @@ String Macros::getValue(const String & key) const
throw Exception("No macro " + key + " in config", ErrorCodes::SYNTAX_ERROR);
}
String Macros::expand(const String & s) const
{
MacroExpansionInfo info;
return expand(s, info);
}
String Macros::expand(const String & s, const StorageID & table_id, bool allow_uuid) const
{
return expand(s, 0, table_id.database_name, table_id.table_name, allow_uuid ? table_id.uuid : UUIDHelpers::Nil);
MacroExpansionInfo info;
info.database_name = table_id.database_name;
info.table_name = table_id.table_name;
info.uuid = allow_uuid ? table_id.uuid : UUIDHelpers::Nil;
return expand(s, info);
}
Names Macros::expand(const Names & source_names, size_t level) const
@ -104,8 +124,12 @@ Names Macros::expand(const Names & source_names, size_t level) const
Names result_names;
result_names.reserve(source_names.size());
MacroExpansionInfo info;
for (const String & name : source_names)
result_names.push_back(expand(name, level));
{
info.level = level;
result_names.push_back(expand(name, info));
}
return result_names;
}

View File

@ -27,15 +27,28 @@ public:
Macros() = default;
Macros(const Poco::Util::AbstractConfiguration & config, const String & key);
struct MacroExpansionInfo
{
/// Settings
String database_name;
String table_name;
UUID uuid = UUIDHelpers::Nil;
bool ignore_unknown = false;
/// Information about macro expansion
size_t level = 0;
bool expanded_uuid = false;
bool has_unknown = false;
};
/** Replace the substring of the form {macro_name} with the value for macro_name, obtained from the config file.
* If {database} and {table} macros aren`t defined explicitly, expand them as database_name and table_name respectively.
* level - the level of recursion.
*/
String expand(const String & s,
size_t level = 0,
const String & database_name = "",
const String & table_name = "",
const UUID & uuid = UUIDHelpers::Nil) const;
MacroExpansionInfo & info) const;
String expand(const String & s) const;
String expand(const String & s, const StorageID & table_id, bool allow_uuid) const;

View File

@ -67,10 +67,19 @@ inline bool isASCII(char c)
return static_cast<unsigned char>(c) < 0x80;
}
inline bool isLowerAlphaASCII(char c)
{
return (c >= 'a' && c <= 'z');
}
inline bool isUpperAlphaASCII(char c)
{
return (c >= 'A' && c <= 'Z');
}
inline bool isAlphaASCII(char c)
{
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z');
return isLowerAlphaASCII(c) || isUpperAlphaASCII(c);
}
inline bool isNumericASCII(char c)
@ -122,6 +131,16 @@ inline bool isPrintableASCII(char c)
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
}
inline bool isPunctuationASCII(char c)
{
uint8_t uc = c;
return (uc >= 33 && uc <= 47)
|| (uc >= 58 && uc <= 64)
|| (uc >= 91 && uc <= 96)
|| (uc >= 123 && uc <= 125);
}
inline bool isValidIdentifier(const std::string_view & str)
{
return !str.empty() && isValidIdentifierBegin(str[0]) && std::all_of(str.begin() + 1, str.end(), isWordCharASCII);

View File

@ -4,6 +4,7 @@
#include <Common/Exception.h>
#include <Common/randomSeed.h>
#include <Common/SipHash.h>
#include <common/getThreadId.h>
#include <common/types.h>
@ -19,7 +20,7 @@ namespace DB
DB::UInt64 randomSeed()
{
struct timespec times;
if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &times))
if (clock_gettime(CLOCK_MONOTONIC, &times))
DB::throwFromErrno("Cannot clock_gettime.", DB::ErrorCodes::CANNOT_CLOCK_GETTIME);
/// Not cryptographically secure as time, pid and stack address can be predictable.
@ -27,7 +28,7 @@ DB::UInt64 randomSeed()
SipHash hash;
hash.update(times.tv_nsec);
hash.update(times.tv_sec);
hash.update(getpid());
hash.update(getThreadId());
hash.update(&times);
return hash.get64();
}

View File

@ -77,7 +77,7 @@ ColumnDefinition::ColumnDefinition(
size_t ColumnDefinition::getPayloadSize() const
{
return 13 + getLengthEncodedStringSize("def") + getLengthEncodedStringSize(schema) + getLengthEncodedStringSize(table) + getLengthEncodedStringSize(org_table) + \
return 12 + getLengthEncodedStringSize("def") + getLengthEncodedStringSize(schema) + getLengthEncodedStringSize(table) + getLengthEncodedStringSize(org_table) + \
getLengthEncodedStringSize(name) + getLengthEncodedStringSize(org_name) + getLengthEncodedNumberSize(next_length);
}
@ -96,7 +96,7 @@ void ColumnDefinition::readPayloadImpl(ReadBuffer & payload)
payload.readStrict(reinterpret_cast<char *>(&column_length), 4);
payload.readStrict(reinterpret_cast<char *>(&column_type), 1);
payload.readStrict(reinterpret_cast<char *>(&flags), 2);
payload.readStrict(reinterpret_cast<char *>(&decimals), 2);
payload.readStrict(reinterpret_cast<char *>(&decimals), 1);
payload.ignore(2);
}
@ -113,7 +113,7 @@ void ColumnDefinition::writePayloadImpl(WriteBuffer & buffer) const
buffer.write(reinterpret_cast<const char *>(&column_length), 4);
buffer.write(reinterpret_cast<const char *>(&column_type), 1);
buffer.write(reinterpret_cast<const char *>(&flags), 2);
buffer.write(reinterpret_cast<const char *>(&decimals), 2);
buffer.write(reinterpret_cast<const char *>(&decimals), 1);
writeChar(0x0, 2, buffer);
}

View File

@ -351,7 +351,7 @@ class IColumn;
M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
\
M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Ordinary, "Default database engine.", 0) \
M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \

View File

@ -926,7 +926,7 @@ void CacheDictionary::update(UpdateUnitPtr & update_unit_ptr) const
else
cell.setExpiresAt(std::chrono::time_point<std::chrono::system_clock>::max());
update_unit_ptr->getPresentIdHandler()(id, cell_idx);
update_unit_ptr->callPresentIdHandler(id, cell_idx);
/// mark corresponding id as found
remaining_ids[id] = 1;
}
@ -988,9 +988,9 @@ void CacheDictionary::update(UpdateUnitPtr & update_unit_ptr) const
if (was_default)
cell.setDefault();
if (was_default)
update_unit_ptr->getAbsentIdHandler()(id, cell_idx);
update_unit_ptr->callAbsentIdHandler(id, cell_idx);
else
update_unit_ptr->getPresentIdHandler()(id, cell_idx);
update_unit_ptr->callPresentIdHandler(id, cell_idx);
continue;
}
/// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`.
@ -1022,7 +1022,7 @@ void CacheDictionary::update(UpdateUnitPtr & update_unit_ptr) const
setDefaultAttributeValue(attribute, cell_idx);
/// inform caller that the cell has not been found
update_unit_ptr->getAbsentIdHandler()(id, cell_idx);
update_unit_ptr->callAbsentIdHandler(id, cell_idx);
}
ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num);

View File

@ -399,16 +399,18 @@ private:
absent_id_handler([](Key, size_t){}){}
PresentIdHandler getPresentIdHandler()
void callPresentIdHandler(Key key, size_t cell_idx)
{
std::lock_guard lock(callback_mutex);
return can_use_callback ? present_id_handler : PresentIdHandler{};
if (can_use_callback)
present_id_handler(key, cell_idx);
}
AbsentIdHandler getAbsentIdHandler()
void callAbsentIdHandler(Key key, size_t cell_idx)
{
std::lock_guard lock(callback_mutex);
return can_use_callback ? absent_id_handler : AbsentIdHandler{};
if (can_use_callback)
absent_id_handler(key, cell_idx);
}
std::vector<Key> requested_ids;

View File

@ -148,7 +148,9 @@ void CacheDictionary::getItemsNumberImpl(
std::begin(cache_expired_ids), std::end(cache_expired_ids),
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
auto on_cell_updated = [&] (const auto id, const auto cell_idx)
auto on_cell_updated =
[&attribute_array, &cache_not_found_ids, &cache_expired_ids, &out]
(const auto id, const auto cell_idx)
{
const auto attribute_value = attribute_array[cell_idx];

View File

@ -331,6 +331,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory);
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineRegexp(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
void registerFileSegmentationEngineLineAsString(FormatFactory & factory);
/// Formats for both input/output.
@ -400,6 +401,7 @@ FormatFactory::FormatFactory()
registerFileSegmentationEngineJSONEachRow(*this);
registerFileSegmentationEngineRegexp(*this);
registerFileSegmentationEngineJSONAsString(*this);
registerFileSegmentationEngineLineAsString(*this);
registerInputFormatNative(*this);
registerOutputFormatNative(*this);

View File

@ -23,13 +23,27 @@ namespace ErrorCodes
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
}
MySQLBlockInputStream::Connection::Connection(
const mysqlxx::PoolWithFailover::Entry & entry_,
const std::string & query_str)
: entry(entry_)
, query{entry->query(query_str)}
, result{query.use()}
{
}
MySQLBlockInputStream::MySQLBlockInputStream(
const mysqlxx::PoolWithFailover::Entry & entry_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_, const bool auto_close_)
: entry{entry_}, query{this->entry->query(query_str)}, result{query.use()}, max_block_size{max_block_size_}, auto_close{auto_close_}
const mysqlxx::PoolWithFailover::Entry & entry,
const std::string & query_str,
const Block & sample_block,
const UInt64 max_block_size_,
const bool auto_close_)
: connection{std::make_unique<Connection>(entry, query_str)}
, max_block_size{max_block_size_}
, auto_close{auto_close_}
{
if (sample_block.columns() != result.getNumFields())
throw Exception{"mysqlxx::UseQueryResult contains " + toString(result.getNumFields()) + " columns while "
if (sample_block.columns() != connection->result.getNumFields())
throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while "
+ toString(sample_block.columns()) + " expected",
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH};
@ -106,11 +120,11 @@ namespace
Block MySQLBlockInputStream::readImpl()
{
auto row = result.fetch();
auto row = connection->result.fetch();
if (!row)
{
if (auto_close)
entry.disconnect();
connection->entry.disconnect();
return {};
}
@ -145,11 +159,42 @@ Block MySQLBlockInputStream::readImpl()
if (num_rows == max_block_size)
break;
row = result.fetch();
row = connection->result.fetch();
}
return description.sample_block.cloneWithColumns(std::move(columns));
}
MySQLBlockInputStream::MySQLBlockInputStream(
const Block & sample_block_,
UInt64 max_block_size_,
bool auto_close_)
: max_block_size(max_block_size_)
, auto_close(auto_close_)
{
description.init(sample_block_);
}
MySQLLazyBlockInputStream::MySQLLazyBlockInputStream(
mysqlxx::Pool & pool_,
const std::string & query_str_,
const Block & sample_block_,
const UInt64 max_block_size_,
const bool auto_close_)
: MySQLBlockInputStream(sample_block_, max_block_size_, auto_close_)
, pool(pool_)
, query_str(query_str_)
{
}
void MySQLLazyBlockInputStream::readPrefix()
{
connection = std::make_unique<Connection>(pool.get(), query_str);
if (description.sample_block.columns() != connection->result.getNumFields())
throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while "
+ toString(description.sample_block.columns()) + " expected",
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH};
}
}
#endif

View File

@ -10,12 +10,13 @@
namespace DB
{
/// Allows processing results of a MySQL query as a sequence of Blocks, simplifies chaining
class MySQLBlockInputStream final : public IBlockInputStream
class MySQLBlockInputStream : public IBlockInputStream
{
public:
MySQLBlockInputStream(
const mysqlxx::PoolWithFailover::Entry & entry_,
const mysqlxx::PoolWithFailover::Entry & entry,
const std::string & query_str,
const Block & sample_block,
const UInt64 max_block_size_,
@ -25,15 +26,43 @@ public:
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
private:
protected:
MySQLBlockInputStream(const Block & sample_block_, UInt64 max_block_size_, bool auto_close_);
Block readImpl() override;
mysqlxx::PoolWithFailover::Entry entry;
mysqlxx::Query query;
mysqlxx::UseQueryResult result;
struct Connection
{
Connection(const mysqlxx::PoolWithFailover::Entry & entry_, const std::string & query_str);
mysqlxx::PoolWithFailover::Entry entry;
mysqlxx::Query query;
mysqlxx::UseQueryResult result;
};
std::unique_ptr<Connection> connection;
const UInt64 max_block_size;
const bool auto_close;
ExternalResultDescription description;
};
/// Like MySQLBlockInputStream, but allocates connection only when reading is starting.
/// It allows to create a lot of stream objects without occupation of all connection pool.
class MySQLLazyBlockInputStream final : public MySQLBlockInputStream
{
public:
MySQLLazyBlockInputStream(
mysqlxx::Pool & pool_,
const std::string & query_str_,
const Block & sample_block_,
const UInt64 max_block_size_,
const bool auto_close_ = false);
private:
void readPrefix() override;
mysqlxx::Pool & pool;
std::string query_str;
};
}

View File

@ -62,12 +62,10 @@ else()
endif()
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
Provides faster linking and lower binary size.
Tradeoff is the inability to debug some source files with e.g. gdb
(empty stack frames and no local variables)."
${STRIP_DSF_DEFAULT})
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS "Do not generate debugger info for ClickHouse functions" ${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
@ -115,7 +113,11 @@ if(USE_RAPIDJSON)
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR})
endif()
option(ENABLE_MULTITARGET_CODE "" ON)
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
# If turned ON, this option defines such macro.
# See `src/Functions/TargetSpecific.h`
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
if (ENABLE_MULTITARGET_CODE)
add_definitions(-DENABLE_MULTITARGET_CODE=1)
else()

View File

@ -80,7 +80,7 @@ public:
DataTypePtr getReturnType(const ColumnsWithTypeAndName &) const override { return {}; } // Not used
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }

View File

@ -1213,7 +1213,7 @@ public:
const bool left_is_string = isStringOrFixedString(which_left);
const bool right_is_string = isStringOrFixedString(which_right);
bool date_and_datetime = (left_type != right_type) &&
bool date_and_datetime = (which_left.idx != which_right.idx) &&
which_left.isDateOrDateTime() && which_right.isDateOrDateTime();
if (left_is_num && right_is_num && !date_and_datetime)

View File

@ -11,7 +11,7 @@
*
* If compiler is not gcc/clang or target isn't x86_64 or ENABLE_MULTITARGET_CODE
* was set to OFF in cmake, all code inside these macros will be removed and
* USE_MUTLITARGE_CODE will be set to 0. Use #if USE_MUTLITARGE_CODE whenever you
* USE_MULTITARGET_CODE will be set to 0. Use #if USE_MULTITARGET_CODE whenever you
* use anything from this namespaces.
*
* For similarities there is a macros DECLARE_DEFAULT_CODE, which wraps code

View File

@ -235,11 +235,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, co
return header.cloneWithColumns(std::move(columns));
}
/** Create a block for set from literal.
* 'set_element_types' - types of what are on the left hand side of IN.
* 'right_arg' - Literal - Tuple or Array.
*/
static Block createBlockForSet(
Block createBlockForSet(
const DataTypePtr & left_arg_type,
const ASTPtr & right_arg,
const DataTypes & set_element_types,
@ -280,14 +276,7 @@ static Block createBlockForSet(
return block;
}
/** Create a block for set from expression.
* 'set_element_types' - types of what are on the left hand side of IN.
* 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
*
* We need special implementation for ASTFunction, because in case, when we interpret
* large tuple or array as function, `evaluateConstantExpression` works extremely slow.
*/
static Block createBlockForSet(
Block createBlockForSet(
const DataTypePtr & left_arg_type,
const std::shared_ptr<ASTFunction> & right_arg,
const DataTypes & set_element_types,

View File

@ -16,11 +16,37 @@ struct ExpressionAction;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
/// The case of an explicit enumeration of values.
/// The case of an explicit enumeration of values.
SetPtr makeExplicitSet(
const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
const Context & context, const SizeLimits & limits, PreparedSets & prepared_sets);
/** Create a block for set from expression.
* 'set_element_types' - types of what are on the left hand side of IN.
* 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
*
* We need special implementation for ASTFunction, because in case, when we interpret
* large tuple or array as function, `evaluateConstantExpression` works extremely slow.
*
* Note: this and following functions are used in third-party applications in Arcadia, so
* they should be declared in header file.
*
*/
Block createBlockForSet(
const DataTypePtr & left_arg_type,
const std::shared_ptr<ASTFunction> & right_arg,
const DataTypes & set_element_types,
const Context & context);
/** Create a block for set from literal.
* 'set_element_types' - types of what are on the left hand side of IN.
* 'right_arg' - Literal - Tuple or Array.
*/
Block createBlockForSet(
const DataTypePtr & left_arg_type,
const ASTPtr & right_arg,
const DataTypes & set_element_types,
const Context & context);
/** For ActionsVisitor
* A stack of ExpressionActions corresponding to nested lambda expressions.

View File

@ -623,4 +623,21 @@ const std::string & Cluster::ShardInfo::pathForInsert(bool prefer_localhost_repl
return dir_name_for_internal_replication_with_local;
}
bool Cluster::maybeCrossReplication() const
{
/// Cluster can be used for cross-replication if some replicas have different default database names,
/// so one clickhouse-server instance can contain multiple replicas.
if (addresses_with_failover.empty())
return false;
const String & database_name = addresses_with_failover.front().front().default_database;
for (const auto & shard : addresses_with_failover)
for (const auto & replica : shard)
if (replica.default_database != database_name)
return true;
return false;
}
}

View File

@ -193,6 +193,10 @@ public:
/// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const;
/// Returns false if cluster configuration doesn't allow to use it for cross-replication.
/// NOTE: true does not mean, that it's actually a cross-replication cluster.
bool maybeCrossReplication() const;
private:
using SlotToShard = std::vector<UInt64>;
SlotToShard slot_to_shard;

View File

@ -968,6 +968,7 @@ StoragePtr Context::getViewSource()
Settings Context::getSettings() const
{
auto lock = getLock();
return settings;
}
@ -1088,6 +1089,18 @@ String Context::getInitialQueryId() const
}
void Context::setCurrentDatabaseNameInGlobalContext(const String & name)
{
if (global_context != this)
throw Exception("Cannot set current database for non global context, this method should be used during server initialization", ErrorCodes::LOGICAL_ERROR);
auto lock = getLock();
if (!current_database.empty())
throw Exception("Default database name cannot be changed in global context without server restart", ErrorCodes::LOGICAL_ERROR);
current_database = name;
}
void Context::setCurrentDatabase(const String & name)
{
DatabaseCatalog::instance().assertDatabaseExists(name);

View File

@ -360,6 +360,9 @@ public:
String getInitialQueryId() const;
void setCurrentDatabase(const String & name);
/// Set current_database for global context. We don't validate that database
/// exists because it should be set before databases loading.
void setCurrentDatabaseNameInGlobalContext(const String & name);
void setCurrentQueryId(const String & query_id);
void killCurrentQuery();

View File

@ -525,7 +525,7 @@ static bool allowDictJoin(StoragePtr joined_storage, const Context & context, St
if (!dict)
return false;
dict_name = dict->dictionaryName();
dict_name = dict->resolvedDictionaryName();
auto dictionary = context.getExternalDictionariesLoader().getDictionary(dict_name);
if (!dictionary)
return false;

View File

@ -5,6 +5,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Common/escapeForFileName.h>
#include <Common/typeid_cast.h>
#include <Common/Macros.h>
#include <Core/Defines.h>
#include <Core/Settings.h>
@ -853,17 +854,60 @@ BlockIO InterpreterCreateQuery::createDictionary(ASTCreateQuery & create)
return {};
}
void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, const Context & context, const String & cluster_name)
{
if (create.attach)
return;
/// For CREATE query generate UUID on initiator, so it will be the same on all hosts.
/// It will be ignored if database does not support UUIDs.
if (create.uuid == UUIDHelpers::Nil)
create.uuid = UUIDHelpers::generateV4();
/// For cross-replication cluster we cannot use UUID in replica path.
String cluster_name_expanded = context.getMacros()->expand(cluster_name);
ClusterPtr cluster = context.getCluster(cluster_name_expanded);
if (cluster->maybeCrossReplication())
{
/// Check that {uuid} macro is not used in zookeeper_path for ReplicatedMergeTree.
/// Otherwise replicas will generate different paths.
if (!create.storage)
return;
if (!create.storage->engine)
return;
if (!startsWith(create.storage->engine->name, "Replicated"))
return;
bool has_explicit_zk_path_arg = create.storage->engine->arguments &&
create.storage->engine->arguments->children.size() >= 2 &&
create.storage->engine->arguments->children[0]->as<ASTLiteral>() &&
create.storage->engine->arguments->children[0]->as<ASTLiteral>()->value.getType() == Field::Types::String;
if (has_explicit_zk_path_arg)
{
String zk_path = create.storage->engine->arguments->children[0]->as<ASTLiteral>()->value.get<String>();
Macros::MacroExpansionInfo info;
info.uuid = create.uuid;
info.ignore_unknown = true;
context.getMacros()->expand(zk_path, info);
if (!info.expanded_uuid)
return;
}
throw Exception("Seems like cluster is configured for cross-replication, "
"but zookeeper_path for ReplicatedMergeTree is not specified or contains {uuid} macro. "
"It's not supported for cross replication, because tables must have different UUIDs. "
"Please specify unique zookeeper_path explicitly.", ErrorCodes::INCORRECT_QUERY);
}
}
BlockIO InterpreterCreateQuery::execute()
{
auto & create = query_ptr->as<ASTCreateQuery &>();
if (!create.cluster.empty())
{
/// Allows to execute ON CLUSTER queries during version upgrade
bool force_backward_compatibility = !context.getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil;
/// For CREATE query generate UUID on initiator, so it will be the same on all hosts.
/// It will be ignored if database does not support UUIDs.
if (!force_backward_compatibility && !create.attach && create.uuid == UUIDHelpers::Nil)
create.uuid = UUIDHelpers::generateV4();
prepareOnClusterQuery(create, context, create.cluster);
return executeDDLQueryOnCluster(query_ptr, context, getRequiredAccess());
}

View File

@ -55,6 +55,8 @@ public:
static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, const Context & context, bool sanity_check_compression_codecs);
static ConstraintsDescription getConstraintsDescription(const ASTExpressionList * constraints);
static void prepareOnClusterQuery(ASTCreateQuery & create, const Context & context, const String & cluster_name);
private:
struct TableProperties
{

View File

@ -119,13 +119,17 @@ struct QueryPlanSettings
{
QueryPlan::ExplainPlanOptions query_plan_options;
/// Apply query plan optimisations.
bool optimize = true;
constexpr static char name[] = "PLAN";
std::unordered_map<std::string, std::reference_wrapper<bool>> boolean_settings =
{
{"header", query_plan_options.header},
{"description", query_plan_options.description},
{"actions", query_plan_options.actions}
{"actions", query_plan_options.actions},
{"optimize", optimize},
};
};
@ -248,7 +252,8 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), context, SelectQueryOptions());
interpreter.buildQueryPlan(plan);
plan.optimize();
if (settings.optimize)
plan.optimize();
WriteBufferFromOStream buffer(ss);
plan.explainPlan(buffer, settings.query_plan_options);

View File

@ -401,7 +401,7 @@ void ProcessList::killAllQueries()
QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_events, bool get_settings) const
{
QueryStatusInfo res;
QueryStatusInfo res{};
res.query = query;
res.client_info = client_info;
@ -431,7 +431,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even
}
if (get_settings && query_context)
res.query_settings = std::make_shared<Settings>(query_context->getSettingsRef());
res.query_settings = std::make_shared<Settings>(query_context->getSettings());
return res;
}

View File

@ -441,7 +441,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
ASTPtr query_ptr(insert.release());
// we need query context to do inserts to target table with MV containing subqueries or joins
auto insert_context = Context(context);
Context insert_context(context);
insert_context.makeQueryContext();
InterpreterInsertQuery interpreter(query_ptr, insert_context);

View File

@ -53,7 +53,7 @@ public:
ASTPtr clone() const override
{
auto replacement = std::make_shared<Replacement>(*this);
replacement->name = name;
replacement->children.clear();
replacement->expr = expr->clone();
replacement->children.push_back(replacement->expr);
return replacement;

View File

@ -0,0 +1,937 @@
#include <cassert>
#include <Parsers/obfuscateQueries.h>
#include <Parsers/Lexer.h>
#include <Poco/String.h>
#include <Common/Exception.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/BitHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromMemory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int TOO_MANY_TEMPORARY_COLUMNS;
}
namespace
{
const std::unordered_set<std::string_view> keywords
{
"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT",
"MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP",
"RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT",
"PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO",
"OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE",
"END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES",
"SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER",
"LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY",
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC",
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY"
};
const std::unordered_set<std::string_view> keep_words
{
"id", "name", "value", "num",
"Id", "Name", "Value", "Num",
"ID", "NAME", "VALUE", "NUM",
};
/// The list of nouns collected from here: http://www.desiquintans.com/nounlist, Public domain.
std::initializer_list<std::string_view> nouns
{
"aardvark", "abacus", "abbey", "abbreviation", "abdomen", "ability", "abnormality", "abolishment", "abortion",
"abrogation", "absence", "abundance", "abuse", "academics", "academy", "accelerant", "accelerator", "accent", "acceptance", "access",
"accessory", "accident", "accommodation", "accompanist", "accomplishment", "accord", "accordance", "accordion", "account", "accountability",
"accountant", "accounting", "accuracy", "accusation", "acetate", "achievement", "achiever", "acid", "acknowledgment", "acorn", "acoustics",
"acquaintance", "acquisition", "acre", "acrylic", "act", "action", "activation", "activist", "activity", "actor", "actress", "acupuncture",
"ad", "adaptation", "adapter", "addiction", "addition", "address", "adjective", "adjustment", "admin", "administration", "administrator",
"admire", "admission", "adobe", "adoption", "adrenalin", "adrenaline", "adult", "adulthood", "advance", "advancement", "advantage", "advent",
"adverb", "advertisement", "advertising", "advice", "adviser", "advocacy", "advocate", "affair", "affect", "affidavit", "affiliate",
"affinity", "afoul", "afterlife", "aftermath", "afternoon", "aftershave", "aftershock", "afterthought", "age", "agency", "agenda", "agent",
"aggradation", "aggression", "aglet", "agony", "agreement", "agriculture", "aid", "aide", "aim", "air", "airbag", "airbus", "aircraft",
"airfare", "airfield", "airforce", "airline", "airmail", "airman", "airplane", "airport", "airship", "airspace", "alarm", "alb", "albatross",
"album", "alcohol", "alcove", "alder", "ale", "alert", "alfalfa", "algebra", "algorithm", "alias", "alibi", "alien", "allegation", "allergist",
"alley", "alliance", "alligator", "allocation", "allowance", "alloy", "alluvium", "almanac", "almighty", "almond", "alpaca", "alpenglow",
"alpenhorn", "alpha", "alphabet", "altar", "alteration", "alternative", "altitude", "alto", "aluminium", "aluminum", "amazement", "amazon",
"ambassador", "amber", "ambience", "ambiguity", "ambition", "ambulance", "amendment", "amenity", "ammunition", "amnesty", "amount", "amusement",
"anagram", "analgesia", "analog", "analogue", "analogy", "analysis", "analyst", "analytics", "anarchist", "anarchy", "anatomy", "ancestor",
"anchovy", "android", "anesthesiologist", "anesthesiology", "angel", "anger", "angina", "angiosperm", "angle", "angora", "angstrom",
"anguish", "animal", "anime", "anise", "ankle", "anklet", "anniversary", "announcement", "annual", "anorak", "answer", "ant", "anteater",
"antecedent", "antechamber", "antelope", "antennae", "anterior", "anthropology", "antibody", "anticipation", "anticodon", "antigen",
"antique", "antiquity", "antler", "antling", "anxiety", "anybody", "anyone", "anything", "anywhere", "apartment", "ape", "aperitif",
"apology", "app", "apparatus", "apparel", "appeal", "appearance", "appellation", "appendix", "appetiser", "appetite", "appetizer", "applause",
"apple", "applewood", "appliance", "application", "appointment", "appreciation", "apprehension", "approach", "appropriation", "approval",
"apricot", "apron", "apse", "aquarium", "aquifer", "arcade", "arch", "archaeologist", "archaeology", "archeology", "archer",
"architect", "architecture", "archives", "area", "arena", "argument", "arithmetic", "ark", "arm", "armadillo", "armament",
"armchair", "armoire", "armor", "armour", "armpit", "armrest", "army", "arrangement", "array", "arrest", "arrival", "arrogance", "arrow",
"art", "artery", "arthur", "artichoke", "article", "artifact", "artificer", "artist", "ascend", "ascent", "ascot", "ash", "ashram", "ashtray",
"aside", "asparagus", "aspect", "asphalt", "aspic", "assassination", "assault", "assembly", "assertion", "assessment", "asset",
"assignment", "assist", "assistance", "assistant", "associate", "association", "assumption", "assurance", "asterisk", "astrakhan", "astrolabe",
"astrologer", "astrology", "astronomy", "asymmetry", "atelier", "atheist", "athlete", "athletics", "atmosphere", "atom", "atrium", "attachment",
"attack", "attacker", "attainment", "attempt", "attendance", "attendant", "attention", "attenuation", "attic", "attitude", "attorney",
"attraction", "attribute", "auction", "audience", "audit", "auditorium", "aunt", "authentication", "authenticity", "author", "authorisation",
"authority", "authorization", "auto", "autoimmunity", "automation", "automaton", "autumn", "availability", "avalanche", "avenue", "average",
"avocado", "award", "awareness", "awe", "axis", "azimuth", "babe", "baboon", "babushka", "baby", "bachelor", "back", "backbone",
"backburn", "backdrop", "background", "backpack", "backup", "backyard", "bacon", "bacterium", "badge", "badger", "bafflement", "bag",
"bagel", "baggage", "baggie", "baggy", "bagpipe", "bail", "bait", "bake", "baker", "bakery", "bakeware", "balaclava", "balalaika", "balance",
"balcony", "ball", "ballet", "balloon", "balloonist", "ballot", "ballpark", "bamboo", "ban", "banana", "band", "bandana", "bandanna",
"bandolier", "bandwidth", "bangle", "banjo", "bank", "bankbook", "banker", "banking", "bankruptcy", "banner", "banquette", "banyan",
"baobab", "bar", "barbecue", "barbeque", "barber", "barbiturate", "bargain", "barge", "baritone", "barium", "bark", "barley", "barn",
"barometer", "barracks", "barrage", "barrel", "barrier", "barstool", "bartender", "base", "baseball", "baseboard", "baseline", "basement",
"basics", "basil", "basin", "basis", "basket", "basketball", "bass", "bassinet", "bassoon", "bat", "bath", "bather", "bathhouse", "bathrobe",
"bathroom", "bathtub", "battalion", "batter", "battery", "batting", "battle", "battleship", "bay", "bayou", "beach", "bead", "beak",
"beam", "bean", "beancurd", "beanie", "beanstalk", "bear", "beard", "beast", "beastie", "beat", "beating", "beauty", "beaver", "beck",
"bed", "bedrock", "bedroom", "bee", "beech", "beef", "beer", "beet", "beetle", "beggar", "beginner", "beginning", "begonia", "behalf",
"behavior", "behaviour", "beheading", "behest", "behold", "being", "belfry", "belief", "believer", "bell", "belligerency", "bellows",
"belly", "belt", "bench", "bend", "beneficiary", "benefit", "beret", "berry", "bestseller", "bet", "beverage", "beyond",
"bias", "bibliography", "bicycle", "bid", "bidder", "bidding", "bidet", "bifocals", "bijou", "bike", "bikini", "bill", "billboard", "billing",
"billion", "bin", "binoculars", "biology", "biopsy", "biosphere", "biplane", "birch", "bird", "birdbath", "birdcage",
"birdhouse", "birth", "birthday", "biscuit", "bit", "bite", "bitten", "bitter", "black", "blackberry", "blackbird", "blackboard", "blackfish",
"blackness", "bladder", "blade", "blame", "blank", "blanket", "blast", "blazer", "blend", "blessing", "blight", "blind", "blinker", "blister",
"blizzard", "block", "blocker", "blog", "blogger", "blood", "bloodflow", "bloom", "bloomer", "blossom", "blouse", "blow", "blowgun",
"blowhole", "blue", "blueberry", "blush", "boar", "board", "boat", "boatload", "boatyard", "bob", "bobcat", "body", "bog", "bolero",
"bolt", "bomb", "bomber", "bombing", "bond", "bonding", "bondsman", "bone", "bonfire", "bongo", "bonnet", "bonsai", "bonus", "boogeyman",
"book", "bookcase", "bookend", "booking", "booklet", "bookmark", "boolean", "boom", "boon", "boost", "booster", "boot", "bootee", "bootie",
"booty", "border", "bore", "borrower", "borrowing", "bosom", "boss", "botany", "bother", "bottle", "bottling", "bottom",
"boudoir", "bough", "boulder", "boulevard", "boundary", "bouquet", "bourgeoisie", "bout", "boutique", "bow", "bower", "bowl", "bowler",
"bowling", "bowtie", "box", "boxer", "boxspring", "boy", "boycott", "boyfriend", "boyhood", "boysenberry", "bra", "brace", "bracelet",
"bracket", "brain", "brake", "bran", "branch", "brand", "brandy", "brass", "brassiere", "bratwurst", "bread", "breadcrumb", "breadfruit",
"break", "breakdown", "breakfast", "breakpoint", "breakthrough", "breast", "breastplate", "breath", "breeze", "brewer", "bribery", "brick",
"bricklaying", "bride", "bridge", "brief", "briefing", "briefly", "briefs", "brilliant", "brink", "brisket", "broad", "broadcast", "broccoli",
"brochure", "brocolli", "broiler", "broker", "bronchitis", "bronco", "bronze", "brooch", "brood", "brook", "broom", "brother",
"brow", "brown", "brownie", "browser", "browsing", "brunch", "brush", "brushfire", "brushing", "bubble", "buck", "bucket", "buckle",
"buckwheat", "bud", "buddy", "budget", "buffalo", "buffer", "buffet", "bug", "buggy", "bugle", "builder", "building", "bulb", "bulk",
"bull", "bulldozer", "bullet", "bump", "bumper", "bun", "bunch", "bungalow", "bunghole", "bunkhouse", "burden", "bureau",
"burglar", "burial", "burlesque", "burn", "burning", "burrito", "burro", "burrow", "burst", "bus", "bush", "business", "businessman",
"bust", "bustle", "butane", "butcher", "butler", "butter", "butterfly", "button", "buy", "buyer", "buying", "buzz", "buzzard",
"cabana", "cabbage", "cabin", "cabinet", "cable", "caboose", "cacao", "cactus", "caddy", "cadet", "cafe", "caffeine", "caftan", "cage",
"cake", "calcification", "calculation", "calculator", "calculus", "calendar", "calf", "caliber", "calibre", "calico", "call", "calm",
"calorie", "camel", "cameo", "camera", "camp", "campaign", "campaigning", "campanile", "camper", "campus", "can", "canal", "cancer",
"candelabra", "candidacy", "candidate", "candle", "candy", "cane", "cannibal", "cannon", "canoe", "canon", "canopy", "cantaloupe", "canteen",
"canvas", "cap", "capability", "capacity", "cape", "caper", "capital", "capitalism", "capitulation", "capon", "cappelletti", "cappuccino",
"captain", "caption", "captor", "car", "carabao", "caramel", "caravan", "carbohydrate", "carbon", "carboxyl", "card", "cardboard", "cardigan",
"care", "career", "cargo", "caribou", "carload", "carnation", "carnival", "carol", "carotene", "carp", "carpenter", "carpet", "carpeting",
"carport", "carriage", "carrier", "carrot", "carry", "cart", "cartel", "carter", "cartilage", "cartload", "cartoon", "cartridge", "carving",
"cascade", "case", "casement", "cash", "cashew", "cashier", "casino", "casket", "cassava", "casserole", "cassock", "cast", "castanet",
"castle", "casualty", "cat", "catacomb", "catalogue", "catalysis", "catalyst", "catamaran", "catastrophe", "catch", "catcher", "category",
"caterpillar", "cathedral", "cation", "catsup", "cattle", "cauliflower", "causal", "cause", "causeway", "caution", "cave", "caviar",
"cayenne", "ceiling", "celebration", "celebrity", "celeriac", "celery", "cell", "cellar", "cello", "celsius", "cement", "cemetery", "cenotaph",
"census", "cent", "center", "centimeter", "centre", "centurion", "century", "cephalopod", "ceramic", "ceramics", "cereal", "ceremony",
"certainty", "certificate", "certification", "cesspool", "chafe", "chain", "chainstay", "chair", "chairlift", "chairman", "chairperson",
"chaise", "chalet", "chalice", "chalk", "challenge", "chamber", "champagne", "champion", "championship", "chance", "chandelier", "change",
"channel", "chaos", "chap", "chapel", "chaplain", "chapter", "character", "characteristic", "characterization", "chard", "charge", "charger",
"charity", "charlatan", "charm", "charset", "chart", "charter", "chasm", "chassis", "chastity", "chasuble", "chateau", "chatter", "chauffeur",
"chauvinist", "check", "checkbook", "checking", "checkout", "checkroom", "cheddar", "cheek", "cheer", "cheese", "cheesecake", "cheetah",
"chef", "chem", "chemical", "chemistry", "chemotaxis", "cheque", "cherry", "chess", "chest", "chestnut", "chick", "chicken", "chicory",
"chief", "chiffonier", "child", "childbirth", "childhood", "chili", "chill", "chime", "chimpanzee", "chin", "chinchilla", "chino", "chip",
"chipmunk", "chivalry", "chive", "chives", "chocolate", "choice", "choir", "choker", "cholesterol", "choosing", "chop",
"chops", "chopstick", "chopsticks", "chord", "chorus", "chow", "chowder", "chrome", "chromolithograph", "chronicle", "chronograph", "chronometer",
"chrysalis", "chub", "chuck", "chug", "church", "churn", "chutney", "cicada", "cigarette", "cilantro", "cinder", "cinema", "cinnamon",
"circadian", "circle", "circuit", "circulation", "circumference", "circumstance", "cirrhosis", "cirrus", "citizen", "citizenship", "citron",
"citrus", "city", "civilian", "civilisation", "civilization", "claim", "clam", "clamp", "clan", "clank", "clapboard", "clarification",
"clarinet", "clarity", "clasp", "class", "classic", "classification", "classmate", "classroom", "clause", "clave", "clavicle", "clavier",
"claw", "clay", "cleaner", "clearance", "clearing", "cleat", "cleavage", "clef", "cleft", "clergyman", "cleric", "clerk", "click", "client",
"cliff", "climate", "climb", "clinic", "clip", "clipboard", "clipper", "cloak", "cloakroom", "clock", "clockwork", "clogs", "cloister",
"clone", "close", "closet", "closing", "closure", "cloth", "clothes", "clothing", "cloud", "cloudburst", "clove", "clover", "cloves",
"club", "clue", "cluster", "clutch", "coach", "coal", "coalition", "coast", "coaster", "coat", "cob", "cobbler", "cobweb",
"cock", "cockpit", "cockroach", "cocktail", "cocoa", "coconut", "cod", "code", "codepage", "codling", "codon", "codpiece", "coevolution",
"cofactor", "coffee", "coffin", "cohesion", "cohort", "coil", "coin", "coincidence", "coinsurance", "coke", "cold", "coleslaw", "coliseum",
"collaboration", "collagen", "collapse", "collar", "collard", "collateral", "colleague", "collection", "collectivisation", "collectivization",
"collector", "college", "collision", "colloquy", "colon", "colonial", "colonialism", "colonisation", "colonization", "colony", "color",
"colorlessness", "colt", "column", "columnist", "comb", "combat", "combination", "combine", "comeback", "comedy", "comestible", "comfort",
"comfortable", "comic", "comics", "comma", "command", "commander", "commandment", "comment", "commerce", "commercial", "commission",
"commitment", "committee", "commodity", "common", "commonsense", "commotion", "communicant", "communication", "communion", "communist",
"community", "commuter", "company", "comparison", "compass", "compassion", "compassionate", "compensation", "competence", "competition",
"competitor", "complaint", "complement", "completion", "complex", "complexity", "compliance", "complication", "complicity", "compliment",
"component", "comportment", "composer", "composite", "composition", "compost", "comprehension", "compress", "compromise", "comptroller",
"compulsion", "computer", "comradeship", "con", "concentrate", "concentration", "concept", "conception", "concern", "concert", "conclusion",
"concrete", "condition", "conditioner", "condominium", "condor", "conduct", "conductor", "cone", "confectionery", "conference", "confidence",
"confidentiality", "configuration", "confirmation", "conflict", "conformation", "confusion", "conga", "congo", "congregation", "congress",
"congressman", "congressperson", "conifer", "connection", "connotation", "conscience", "consciousness", "consensus", "consent", "consequence",
"conservation", "conservative", "consideration", "consignment", "consist", "consistency", "console", "consonant", "conspiracy", "conspirator",
"constant", "constellation", "constitution", "constraint", "construction", "consul", "consulate", "consulting", "consumer", "consumption",
"contact", "contact lens", "contagion", "container", "content", "contention", "contest", "context", "continent", "contingency", "continuity",
"contour", "contract", "contractor", "contrail", "contrary", "contrast", "contribution", "contributor", "control", "controller", "controversy",
"convection", "convenience", "convention", "conversation", "conversion", "convert", "convertible", "conviction", "cook", "cookbook",
"cookie", "cooking", "coonskin", "cooperation", "coordination", "coordinator", "cop", "cope", "copper", "copy", "copying",
"copyright", "copywriter", "coral", "cord", "corduroy", "core", "cork", "cormorant", "corn", "corner", "cornerstone", "cornet", "cornflakes",
"cornmeal", "corporal", "corporation", "corporatism", "corps", "corral", "correspondence", "correspondent", "corridor", "corruption",
"corsage", "cosset", "cost", "costume", "cot", "cottage", "cotton", "couch", "cougar", "cough", "council", "councilman", "councilor",
"councilperson", "counsel", "counseling", "counselling", "counsellor", "counselor", "count", "counter", "counterpart",
"counterterrorism", "countess", "country", "countryside", "county", "couple", "coupon", "courage", "course", "court", "courthouse", "courtroom",
"cousin", "covariate", "cover", "coverage", "coverall", "cow", "cowbell", "cowboy", "coyote", "crab", "crack", "cracker", "crackers",
"cradle", "craft", "craftsman", "cranberry", "crane", "cranky", "crash", "crate", "cravat", "craw", "crawdad", "crayfish", "crayon",
"crazy", "cream", "creation", "creationism", "creationist", "creative", "creativity", "creator", "creature", "creche", "credential",
"credenza", "credibility", "credit", "creditor", "creek", "creme brulee", "crepe", "crest", "crew", "crewman", "crewmate", "crewmember",
"crewmen", "cria", "crib", "cribbage", "cricket", "cricketer", "crime", "criminal", "crinoline", "crisis", "crisp", "criteria", "criterion",
"critic", "criticism", "crocodile", "crocus", "croissant", "crook", "crop", "cross", "crotch",
"croup", "crow", "crowd", "crown", "crucifixion", "crude", "cruelty", "cruise", "crumb", "crunch", "crusader", "crush", "crust", "cry",
"crystal", "crystallography", "cub", "cube", "cuckoo", "cucumber", "cue", "cuisine", "cultivar", "cultivator", "culture",
"culvert", "cummerbund", "cup", "cupboard", "cupcake", "cupola", "curd", "cure", "curio", "curiosity", "curl", "curler", "currant", "currency",
"current", "curriculum", "curry", "curse", "cursor", "curtailment", "curtain", "curve", "cushion", "custard", "custody", "custom", "customer",
"cut", "cuticle", "cutlet", "cutover", "cutting", "cyclamen", "cycle", "cyclone", "cyclooxygenase", "cygnet", "cylinder", "cymbal", "cynic",
"cyst", "cytokine", "cytoplasm", "dad", "daddy", "daffodil", "dagger", "dahlia", "daikon", "daily", "dairy", "daisy", "dam", "damage",
"dame", "dance", "dancer", "dancing", "dandelion", "danger", "dare", "dark", "darkness", "darn", "dart", "dash", "dashboard",
"data", "database", "date", "daughter", "dawn", "day", "daybed", "daylight", "dead", "deadline", "deal", "dealer", "dealing", "dearest",
"death", "deathwatch", "debate", "debris", "debt", "debtor", "decade", "decadence", "decency", "decimal", "decision",
"deck", "declaration", "declination", "decline", "decoder", "decongestant", "decoration", "decrease", "decryption", "dedication", "deduce",
"deduction", "deed", "deep", "deer", "default", "defeat", "defendant", "defender", "defense", "deficit", "definition", "deformation",
"degradation", "degree", "delay", "deliberation", "delight", "delivery", "demand", "democracy", "democrat", "demon", "demur", "den",
"denim", "denominator", "density", "dentist", "deodorant", "department", "departure", "dependency", "dependent", "deployment", "deposit",
"deposition", "depot", "depression", "depressive", "depth", "deputy", "derby", "derivation", "derivative", "derrick", "descendant", "descent",
"description", "desert", "design", "designation", "designer", "desire", "desk", "desktop", "dessert", "destination", "destiny", "destroyer",
"destruction", "detail", "detainee", "detainment", "detection", "detective", "detector", "detention", "determination", "detour", "devastation",
"developer", "developing", "development", "developmental", "deviance", "deviation", "device", "devil", "dew", "dhow", "diabetes", "diadem",
"diagnosis", "diagram", "dial", "dialect", "dialogue", "diam", "diamond", "diaper", "diaphragm", "diarist", "diary", "dibble", "dickey", "dictaphone", "dictator", "diction", "dictionary", "die", "diesel", "diet", "difference", "differential", "difficulty", "diffuse",
"dig", "digestion", "digestive", "digger", "digging", "digit", "dignity", "dilapidation", "dill", "dilution", "dime", "dimension", "dimple",
"diner", "dinghy", "dining", "dinner", "dinosaur", "dioxide", "dip", "diploma", "diplomacy", "dipstick", "direction", "directive", "director",
"directory", "dirndl", "dirt", "disability", "disadvantage", "disagreement", "disappointment", "disarmament", "disaster", "discharge",
"discipline", "disclaimer", "disclosure", "disco", "disconnection", "discount", "discourse", "discovery", "discrepancy", "discretion",
"discrimination", "discussion", "disdain", "disease", "disembodiment", "disengagement", "disguise", "disgust", "dish", "dishwasher",
"disk", "disparity", "dispatch", "displacement", "display", "disposal", "disposer", "disposition", "dispute", "disregard", "disruption",
"dissemination", "dissonance", "distance", "distinction", "distortion", "distribution", "distributor", "district", "divalent", "divan",
"diver", "diversity", "divide", "dividend", "divider", "divine", "diving", "division", "divorce", "doc", "dock", "doctor", "doctorate",
"doctrine", "document", "documentary", "documentation", "doe", "dog", "doggie", "dogsled", "dogwood", "doing", "doll", "dollar", "dollop",
"dolman", "dolor", "dolphin", "domain", "dome", "domination", "donation", "donkey", "donor", "donut", "door", "doorbell", "doorknob",
"doorpost", "doorway", "dory", "dose", "dot", "double", "doubling", "doubt", "doubter", "dough", "doughnut", "down", "downfall", "downforce",
"downgrade", "download", "downstairs", "downtown", "downturn", "dozen", "draft", "drag", "dragon", "dragonfly", "dragonfruit", "dragster",
"drain", "drainage", "drake", "drama", "dramaturge", "drapes", "draw", "drawbridge", "drawer", "drawing", "dream", "dreamer", "dredger",
"dress", "dresser", "dressing", "drill", "drink", "drinking", "drive", "driver", "driveway", "driving", "drizzle", "dromedary", "drop",
"drudgery", "drug", "drum", "drummer", "drunk", "dryer", "duck", "duckling", "dud", "dude", "due", "duel", "dueling", "duffel", "dugout",
"dulcimer", "dumbwaiter", "dump", "dump truck", "dune", "dune buggy", "dungarees", "dungeon", "duplexer", "duration", "durian", "dusk",
"dust", "dust storm", "duster", "duty", "dwarf", "dwell", "dwelling", "dynamics", "dynamite", "dynamo", "dynasty", "dysfunction",
"eagle", "eaglet", "ear", "eardrum", "earmuffs", "earnings", "earplug", "earring", "earrings", "earth", "earthquake",
"earthworm", "ease", "easel", "east", "eating", "eaves", "eavesdropper", "ecclesia", "echidna", "eclipse", "ecliptic", "ecology", "economics",
"economy", "ecosystem", "ectoderm", "ectodermal", "ecumenist", "eddy", "edge", "edger", "edible", "editing", "edition", "editor", "editorial",
"education", "eel", "effacement", "effect", "effective", "effectiveness", "effector", "efficacy", "efficiency", "effort", "egg", "egghead",
"eggnog", "eggplant", "ego", "eicosanoid", "ejector", "elbow", "elderberry", "election", "electricity", "electrocardiogram", "electronics",
"element", "elephant", "elevation", "elevator", "eleventh", "elf", "elicit", "eligibility", "elimination", "elite", "elixir", "elk",
"ellipse", "elm", "elongation", "elver", "email", "emanate", "embarrassment", "embassy", "embellishment", "embossing", "embryo", "emerald",
"emergence", "emergency", "emergent", "emery", "emission", "emitter", "emotion", "emphasis", "empire", "employ", "employee", "employer",
"employment", "empowerment", "emu", "enactment", "encirclement", "enclave", "enclosure", "encounter", "encouragement", "encyclopedia",
"end", "endive", "endoderm", "endorsement", "endothelium", "endpoint", "enemy", "energy", "enforcement", "engagement", "engine", "engineer",
"engineering", "enigma", "enjoyment", "enquiry", "enrollment", "enterprise", "entertainment", "enthusiasm", "entirety", "entity", "entrance",
"entree", "entrepreneur", "entry", "envelope", "environment", "envy", "enzyme", "epauliere", "epee", "ephemera", "ephemeris", "ephyra",
"epic", "episode", "epithelium", "epoch", "eponym", "epoxy", "equal", "equality", "equation", "equinox", "equipment", "equity", "equivalent",
"era", "eraser", "erection", "erosion", "error", "escalator", "escape", "escort", "espadrille", "espalier", "essay", "essence", "essential",
"establishment", "estate", "estimate", "estrogen", "estuary", "eternity", "ethernet", "ethics", "ethnicity", "ethyl", "euphonium", "eurocentrism",
"evaluation", "evaluator", "evaporation", "eve", "evening", "event", "everybody", "everyone", "everything", "eviction",
"evidence", "evil", "evocation", "evolution", "exaggeration", "exam", "examination", "examiner", "example",
"exasperation", "excellence", "exception", "excerpt", "excess", "exchange", "excitement", "exclamation", "excursion", "excuse", "execution",
"executive", "executor", "exercise", "exhaust", "exhaustion", "exhibit", "exhibition", "exile", "existence", "exit", "exocrine", "expansion",
"expansionism", "expectancy", "expectation", "expedition", "expense", "experience", "experiment", "experimentation", "expert", "expertise",
"explanation", "exploration", "explorer", "explosion", "export", "expose", "exposition", "exposure", "expression", "extension", "extent",
"exterior", "external", "extinction", "extreme", "extremist", "eye", "eyeball", "eyebrow", "eyebrows", "eyeglasses", "eyelash", "eyelashes",
"eyelid", "eyelids", "eyeliner", "eyestrain", "eyrie", "fabric", "face", "facelift", "facet", "facility", "facsimile", "fact", "factor",
"factory", "faculty", "fahrenheit", "fail", "failure", "fairness", "fairy", "faith", "faithful", "fall", "fallacy", "fame",
"familiar", "familiarity", "family", "fan", "fang", "fanlight", "fanny", "fantasy", "farm", "farmer", "farming", "farmland",
"farrow", "fascia", "fashion", "fat", "fate", "father", "fatigue", "fatigues", "faucet", "fault", "fav", "fava", "favor",
"favorite", "fawn", "fax", "fear", "feast", "feather", "feature", "fedelini", "federation", "fedora", "fee", "feed", "feedback", "feeding",
"feel", "feeling", "fellow", "felony", "female", "fen", "fence", "fencing", "fender", "feng", "fennel", "ferret", "ferry", "ferryboat",
"fertilizer", "festival", "fetus", "few", "fiber", "fiberglass", "fibre", "fibroblast", "fibrosis", "ficlet", "fiction", "fiddle", "field",
"fiery", "fiesta", "fifth", "fig", "fight", "fighter", "figure", "figurine", "file", "filing", "fill", "fillet", "filly", "film", "filter",
"filth", "final", "finance", "financing", "finding", "fine", "finer", "finger", "fingerling", "fingernail", "finish", "finisher", "fir",
"fire", "fireman", "fireplace", "firewall", "firm", "first", "fish", "fishbone", "fisherman", "fishery", "fishing", "fishmonger", "fishnet",
"fisting", "fit", "fitness", "fix", "fixture", "flag", "flair", "flame", "flan", "flanker", "flare", "flash", "flat", "flatboat", "flavor",
"flax", "fleck", "fledgling", "fleece", "flesh", "flexibility", "flick", "flicker", "flight", "flint", "flintlock", "flock",
"flood", "floodplain", "floor", "floozie", "flour", "flow", "flower", "flu", "flugelhorn", "fluke", "flume", "flung", "flute", "fly",
"flytrap", "foal", "foam", "fob", "focus", "fog", "fold", "folder", "folk", "folklore", "follower", "following", "fondue", "font", "food",
"foodstuffs", "fool", "foot", "footage", "football", "footnote", "footprint", "footrest", "footstep", "footstool", "footwear", "forage",
"forager", "foray", "force", "ford", "forearm", "forebear", "forecast", "forehead", "foreigner", "forelimb", "forest", "forestry", "forever",
"forgery", "fork", "form", "formal", "formamide", "format", "formation", "former", "formicarium", "formula", "fort", "forte", "fortnight",
"fortress", "fortune", "forum", "foundation", "founder", "founding", "fountain", "fourths", "fowl", "fox", "foxglove", "fraction", "fragrance",
"frame", "framework", "fratricide", "fraud", "fraudster", "freak", "freckle", "freedom", "freelance", "freezer", "freezing", "freight",
"freighter", "frenzy", "freon", "frequency", "fresco", "friction", "fridge", "friend", "friendship", "fries", "frigate", "fright", "fringe",
"fritter", "frock", "frog", "front", "frontier", "frost", "frosting", "frown", "fruit", "frustration", "fry", "fuel", "fugato",
"fulfillment", "full", "fun", "function", "functionality", "fund", "funding", "fundraising", "funeral", "fur", "furnace", "furniture",
"furry", "fusarium", "futon", "future", "gadget", "gaffe", "gaffer", "gain", "gaiters", "gale", "gallery", "galley",
"gallon", "galoshes", "gambling", "game", "gamebird", "gaming", "gander", "gang", "gap", "garage", "garb", "garbage", "garden",
"garlic", "garment", "garter", "gas", "gasket", "gasoline", "gasp", "gastronomy", "gastropod", "gate", "gateway", "gather", "gathering",
"gator", "gauge", "gauntlet", "gavel", "gazebo", "gazelle", "gear", "gearshift", "geek", "gel", "gelatin", "gelding", "gem", "gemsbok",
"gender", "gene", "general", "generation", "generator", "generosity", "genetics", "genie", "genius", "genocide", "genre", "gentleman",
"geography", "geology", "geometry", "geranium", "gerbil", "gesture", "geyser", "gherkin", "ghost", "giant", "gift", "gig", "gigantism",
"giggle", "ginger", "gingerbread", "ginseng", "giraffe", "girdle", "girl", "girlfriend", "git", "glacier", "gladiolus", "glance", "gland",
"glass", "glasses", "glee", "glen", "glider", "gliding", "glimpse", "globe", "glockenspiel", "gloom", "glory", "glove", "glow", "glucose",
"glue", "glut", "glutamate", "gnat", "gnu", "goal", "goat", "gobbler", "god", "goddess", "godfather", "godmother", "godparent",
"goggles", "going", "gold", "goldfish", "golf", "gondola", "gong", "good", "goodbye", "goodie", "goodness", "goodnight",
"goodwill", "goose", "gopher", "gorilla", "gosling", "gossip", "governance", "government", "governor", "gown", "grace", "grade",
"gradient", "graduate", "graduation", "graffiti", "graft", "grain", "gram", "grammar", "gran", "grand", "grandchild", "granddaughter",
"grandfather", "grandma", "grandmom", "grandmother", "grandpa", "grandparent", "grandson", "granny", "granola", "grant", "grape", "grapefruit",
"graph", "graphic", "grasp", "grass", "grasshopper", "grassland", "gratitude", "gravel", "gravitas", "gravity", "gravy", "gray", "grease",
"greatness", "greed", "green", "greenhouse", "greens", "grenade", "grey", "grid", "grief",
"grill", "grin", "grip", "gripper", "grit", "grocery", "ground", "group", "grouper", "grouse", "grove", "growth", "grub", "guacamole",
"guarantee", "guard", "guava", "guerrilla", "guess", "guest", "guestbook", "guidance", "guide", "guideline", "guilder", "guilt", "guilty",
"guinea", "guitar", "guitarist", "gum", "gumshoe", "gun", "gunpowder", "gutter", "guy", "gym", "gymnast", "gymnastics", "gynaecology",
"gyro", "habit", "habitat", "hacienda", "hacksaw", "hackwork", "hail", "hair", "haircut", "hake", "half",
"halibut", "hall", "halloween", "hallway", "halt", "ham", "hamburger", "hammer", "hammock", "hamster", "hand", "handball",
"handful", "handgun", "handicap", "handle", "handlebar", "handmaiden", "handover", "handrail", "handsaw", "hanger", "happening", "happiness",
"harald", "harbor", "harbour", "hardboard", "hardcover", "hardening", "hardhat", "hardship", "hardware", "hare", "harm",
"harmonica", "harmonise", "harmonize", "harmony", "harp", "harpooner", "harpsichord", "harvest", "harvester", "hash", "hashtag", "hassock",
"haste", "hat", "hatbox", "hatchet", "hatchling", "hate", "hatred", "haunt", "haven", "haversack", "havoc", "hawk", "hay", "haze", "hazel",
"hazelnut", "head", "headache", "headlight", "headline", "headphones", "headquarters", "headrest", "health", "hearing",
"hearsay", "heart", "heartache", "heartbeat", "hearth", "hearthside", "heartwood", "heat", "heater", "heating", "heaven",
"heavy", "hectare", "hedge", "hedgehog", "heel", "heifer", "height", "heir", "heirloom", "helicopter", "helium", "hell", "hellcat", "hello",
"helmet", "helo", "help", "hemisphere", "hemp", "hen", "hepatitis", "herb", "herbs", "heritage", "hermit", "hero", "heroine", "heron",
"herring", "hesitation", "hexagon", "heyday", "hiccups", "hide", "hierarchy", "high", "highland", "highlight",
"highway", "hike", "hiking", "hill", "hint", "hip", "hippodrome", "hippopotamus", "hire", "hiring", "historian", "history", "hit", "hive",
"hobbit", "hobby", "hockey", "hoe", "hog", "hold", "holder", "hole", "holiday", "home", "homeland", "homeownership", "hometown", "homework",
"homicide", "homogenate", "homonym", "honesty", "honey", "honeybee", "honeydew", "honor", "honoree", "hood",
"hoof", "hook", "hop", "hope", "hops", "horde", "horizon", "hormone", "horn", "hornet", "horror", "horse", "horseradish", "horst", "hose",
"hosiery", "hospice", "hospital", "hospitalisation", "hospitality", "hospitalization", "host", "hostel", "hostess", "hotdog", "hotel",
"hound", "hour", "hourglass", "house", "houseboat", "household", "housewife", "housework", "housing", "hovel", "hovercraft", "howard",
"howitzer", "hub", "hubcap", "hubris", "hug", "hugger", "hull", "human", "humanity", "humidity", "hummus", "humor", "humour", "hunchback",
"hundred", "hunger", "hunt", "hunter", "hunting", "hurdle", "hurdler", "hurricane", "hurry", "hurt", "husband", "hut", "hutch", "hyacinth",
"hybridisation", "hybridization", "hydrant", "hydraulics", "hydrocarb", "hydrocarbon", "hydrofoil", "hydrogen", "hydrolyse", "hydrolysis",
"hydrolyze", "hydroxyl", "hyena", "hygienic", "hype", "hyphenation", "hypochondria", "hypothermia", "hypothesis", "ice",
"iceberg", "icebreaker", "icecream", "icicle", "icing", "icon", "icy", "id", "idea", "ideal", "identification", "identity", "ideology",
"idiom", "idiot", "igloo", "ignorance", "ignorant", "ikebana", "illegal", "illiteracy", "illness", "illusion", "illustration", "image",
"imagination", "imbalance", "imitation", "immigrant", "immigration", "immortal", "impact", "impairment", "impala", "impediment", "implement",
"implementation", "implication", "import", "importance", "impostor", "impress", "impression", "imprisonment", "impropriety", "improvement",
"impudence", "impulse", "inability", "inauguration", "inbox", "incandescence", "incarnation", "incense", "incentive",
"inch", "incidence", "incident", "incision", "inclusion", "income", "incompetence", "inconvenience", "increase", "incubation", "independence",
"independent", "index", "indication", "indicator", "indigence", "individual", "industrialisation", "industrialization", "industry", "inequality",
"inevitable", "infancy", "infant", "infarction", "infection", "infiltration", "infinite", "infix", "inflammation", "inflation", "influence",
"influx", "info", "information", "infrastructure", "infusion", "inglenook", "ingrate", "ingredient", "inhabitant", "inheritance", "inhibition",
"inhibitor", "initial", "initialise", "initialize", "initiative", "injunction", "injury", "injustice", "ink", "inlay", "inn", "innervation",
"innocence", "innocent", "innovation", "input", "inquiry", "inscription", "insect", "insectarium", "insert", "inside", "insight", "insolence",
"insomnia", "inspection", "inspector", "inspiration", "installation", "instance", "instant", "instinct", "institute", "institution",
"instruction", "instructor", "instrument", "instrumentalist", "instrumentation", "insulation", "insurance", "insurgence", "insurrection",
"integer", "integral", "integration", "integrity", "intellect", "intelligence", "intensity", "intent", "intention", "intentionality",
"interaction", "interchange", "interconnection", "intercourse", "interest", "interface", "interferometer", "interior", "interject", "interloper",
"internet", "interpretation", "interpreter", "interval", "intervenor", "intervention", "interview", "interviewer", "intestine", "introduction",
"intuition", "invader", "invasion", "invention", "inventor", "inventory", "inverse", "inversion", "investigation", "investigator", "investment",
"investor", "invitation", "invite", "invoice", "involvement", "iridescence", "iris", "iron", "ironclad", "irony", "irrigation", "ischemia",
"island", "isogloss", "isolation", "issue", "item", "itinerary", "ivory", "jack", "jackal", "jacket", "jackfruit", "jade", "jaguar",
"jail", "jailhouse", "jalapeño", "jam", "jar", "jasmine", "jaw", "jazz", "jealousy", "jeans", "jeep", "jelly", "jellybeans", "jellyfish",
"jerk", "jet", "jewel", "jeweller", "jewellery", "jewelry", "jicama", "jiffy", "job", "jockey", "jodhpurs", "joey", "jogging", "joint",
"joke", "jot", "journal", "journalism", "journalist", "journey", "joy", "judge", "judgment", "judo", "jug", "juggernaut", "juice", "julienne",
"jumbo", "jump", "jumper", "jumpsuit", "jungle", "junior", "junk", "junker", "junket", "jury", "justice", "justification", "jute", "kale",
"kamikaze", "kangaroo", "karate", "kayak", "kazoo", "kebab", "keep", "keeper", "kendo", "kennel", "ketch", "ketchup", "kettle", "kettledrum",
"key", "keyboard", "keyboarding", "keystone", "kick", "kid", "kidney", "kielbasa", "kill", "killer", "killing", "kilogram",
"kilometer", "kilt", "kimono", "kinase", "kind", "kindness", "king", "kingdom", "kingfish", "kiosk", "kiss", "kit", "kitchen", "kite",
"kitsch", "kitten", "kitty", "kiwi", "knee", "kneejerk", "knickers", "knife", "knight", "knitting", "knock", "knot",
"knowledge", "knuckle", "koala", "kohlrabi", "kumquat", "lab", "label", "labor", "laboratory", "laborer", "labour", "labourer", "lace",
"lack", "lacquerware", "lad", "ladder", "ladle", "lady", "ladybug", "lag", "lake", "lamb", "lambkin", "lament", "lamp", "lanai", "land",
"landform", "landing", "landmine", "landscape", "lane", "language", "lantern", "lap", "laparoscope", "lapdog", "laptop", "larch", "lard",
"larder", "lark", "larva", "laryngitis", "lasagna", "lashes", "last", "latency", "latex", "lathe", "latitude", "latte", "latter", "laugh",
"laughter", "laundry", "lava", "law", "lawmaker", "lawn", "lawsuit", "lawyer", "lay", "layer", "layout", "lead", "leader", "leadership",
"leading", "leaf", "league", "leaker", "leap", "learning", "leash", "leather", "leave", "leaver", "lecture", "leek", "leeway", "left",
"leg", "legacy", "legal", "legend", "legging", "legislation", "legislator", "legislature", "legitimacy", "legume", "leisure", "lemon",
"lemonade", "lemur", "lender", "lending", "length", "lens", "lentil", "leopard", "leprosy", "leptocephalus", "lesson", "letter",
"lettuce", "level", "lever", "leverage", "leveret", "liability", "liar", "liberty", "libido", "library", "licence", "license", "licensing",
"licorice", "lid", "lie", "lieu", "lieutenant", "life", "lifestyle", "lifetime", "lift", "ligand", "light", "lighting", "lightning",
"lightscreen", "ligula", "likelihood", "likeness", "lilac", "lily", "limb", "lime", "limestone", "limit", "limitation", "limo", "line",
"linen", "liner", "linguist", "linguistics", "lining", "link", "linkage", "linseed", "lion", "lip", "lipid", "lipoprotein", "lipstick",
"liquid", "liquidity", "liquor", "list", "listening", "listing", "literate", "literature", "litigation", "litmus", "litter", "littleneck",
"liver", "livestock", "living", "lizard", "llama", "load", "loading", "loaf", "loafer", "loan", "lobby", "lobotomy", "lobster", "local",
"locality", "location", "lock", "locker", "locket", "locomotive", "locust", "lode", "loft", "log", "loggia", "logic", "login", "logistics",
"logo", "loincloth", "lollipop", "loneliness", "longboat", "longitude", "look", "lookout", "loop", "loophole", "loquat", "lord", "loss",
"lot", "lotion", "lottery", "lounge", "louse", "lout", "love", "lover", "lox", "loyalty", "luck", "luggage", "lumber", "lumberman", "lunch",
"luncheonette", "lunchmeat", "lunchroom", "lung", "lunge", "lust", "lute", "luxury", "lychee", "lycra", "lye", "lymphocyte", "lynx",
"lyocell", "lyre", "lyrics", "lysine", "mRNA", "macadamia", "macaroni", "macaroon", "macaw", "machine", "machinery", "macrame", "macro",
"macrofauna", "madam", "maelstrom", "maestro", "magazine", "maggot", "magic", "magnet", "magnitude", "maid", "maiden", "mail", "mailbox",
"mailer", "mailing", "mailman", "main", "mainland", "mainstream", "maintainer", "maintenance", "maize", "major", "majority",
"makeover", "maker", "makeup", "making", "male", "malice", "mall", "mallard", "mallet", "malnutrition", "mama", "mambo", "mammoth", "man",
"manacle", "management", "manager", "manatee", "mandarin", "mandate", "mandolin", "mangle", "mango", "mangrove", "manhunt", "maniac",
"manicure", "manifestation", "manipulation", "mankind", "manner", "manor", "mansard", "manservant", "mansion", "mantel", "mantle", "mantua",
"manufacturer", "manufacturing", "many", "map", "maple", "mapping", "maracas", "marathon", "marble", "march", "mare", "margarine", "margin",
"mariachi", "marimba", "marines", "marionberry", "mark", "marker", "market", "marketer", "marketing", "marketplace", "marksman", "markup",
"marmalade", "marriage", "marsh", "marshland", "marshmallow", "marten", "marxism", "mascara", "mask", "masonry", "mass", "massage", "mast",
"master", "masterpiece", "mastication", "mastoid", "mat", "match", "matchmaker", "mate", "material", "maternity", "math", "mathematics",
"matrix", "matter", "mattock", "mattress", "max", "maximum", "maybe", "mayonnaise", "mayor", "meadow", "meal", "mean", "meander", "meaning",
"means", "meantime", "measles", "measure", "measurement", "meat", "meatball", "meatloaf", "mecca", "mechanic", "mechanism", "med", "medal",
"media", "median", "medication", "medicine", "medium", "meet", "meeting", "melatonin", "melody", "melon", "member", "membership", "membrane",
"meme", "memo", "memorial", "memory", "men", "menopause", "menorah", "mention", "mentor", "menu", "merchandise", "merchant", "mercury",
"meridian", "meringue", "merit", "mesenchyme", "mess", "message", "messenger", "messy", "metabolite", "metal", "metallurgist", "metaphor",
"meteor", "meteorology", "meter", "methane", "method", "methodology", "metric", "metro", "metronome", "mezzanine", "microlending", "micronutrient",
"microphone", "microwave", "midden", "middle", "middleman", "midline", "midnight", "midwife", "might", "migrant", "migration",
"mile", "mileage", "milepost", "milestone", "military", "milk", "milkshake", "mill", "millennium", "millet", "millimeter", "million",
"millisecond", "millstone", "mime", "mimosa", "min", "mincemeat", "mind", "mine", "mineral", "mineshaft", "mini", "minibus",
"minimalism", "minimum", "mining", "minion", "minister", "mink", "minnow", "minor", "minority", "mint", "minute", "miracle",
"mirror", "miscarriage", "miscommunication", "misfit", "misnomer", "misogyny", "misplacement", "misreading", "misrepresentation", "miss",
"missile", "mission", "missionary", "mist", "mistake", "mister", "misunderstand", "miter", "mitten", "mix", "mixer", "mixture", "moai",
"moat", "mob", "mobile", "mobility", "mobster", "moccasins", "mocha", "mochi", "mode", "model", "modeling", "modem", "modernist", "modernity",
"modification", "molar", "molasses", "molding", "mole", "molecule", "mom", "moment", "monastery", "monasticism", "money", "monger", "monitor",
"monitoring", "monk", "monkey", "monocle", "monopoly", "monotheism", "monsoon", "monster", "month", "monument", "mood", "moody", "moon",
"moonlight", "moonscape", "moonshine", "moose", "mop", "morale", "morbid", "morbidity", "morning", "moron", "morphology", "morsel", "mortal",
"mortality", "mortgage", "mortise", "mosque", "mosquito", "most", "motel", "moth", "mother", "motion", "motivation",
"motive", "motor", "motorboat", "motorcar", "motorcycle", "mound", "mountain", "mouse", "mouser", "mousse", "moustache", "mouth", "mouton",
"movement", "mover", "movie", "mower", "mozzarella", "mud", "muffin", "mug", "mukluk", "mule", "multimedia", "murder", "muscat", "muscatel",
"muscle", "musculature", "museum", "mushroom", "music", "musician", "muskrat", "mussel", "mustache", "mustard",
"mutation", "mutt", "mutton", "mycoplasma", "mystery", "myth", "mythology", "nail", "name", "naming", "nanoparticle", "napkin", "narrative",
"nasal", "nation", "nationality", "native", "naturalisation", "nature", "navigation", "necessity", "neck", "necklace", "necktie", "nectar",
"nectarine", "need", "needle", "neglect", "negligee", "negotiation", "neighbor", "neighborhood", "neighbour", "neighbourhood", "neologism",
"neon", "neonate", "nephew", "nerve", "nest", "nestling", "nestmate", "net", "netball", "netbook", "netsuke", "network", "networking",
"neurobiologist", "neuron", "neuropathologist", "neuropsychiatry", "news", "newsletter", "newspaper", "newsprint", "newsstand", "nexus",
"nibble", "nicety", "niche", "nick", "nickel", "nickname", "niece", "night", "nightclub", "nightgown", "nightingale", "nightlife", "nightlight",
"nightmare", "ninja", "nit", "nitrogen", "nobody", "nod", "node", "noir", "noise", "nonbeliever", "nonconformist", "nondisclosure", "nonsense",
"noodle", "noodles", "noon", "norm", "normal", "normalisation", "normalization", "north", "nose", "notation", "note", "notebook", "notepad",
"nothing", "notice", "notion", "notoriety", "nougat", "noun", "nourishment", "novel", "nucleotidase", "nucleotide", "nudge", "nuke",
"number", "numeracy", "numeric", "numismatist", "nun", "nurse", "nursery", "nursing", "nurture", "nut", "nutmeg", "nutrient", "nutrition",
"nylon", "nymph", "oak", "oar", "oasis", "oat", "oatmeal", "oats", "obedience", "obesity", "obi", "object", "objection", "objective",
"obligation", "oboe", "observation", "observatory", "obsession", "obsidian", "obstacle", "occasion", "occupation", "occurrence", "ocean",
"ocelot", "octagon", "octave", "octavo", "octet", "octopus", "odometer", "odyssey", "oeuvre", "offence", "offense", "offer",
"offering", "office", "officer", "official", "offset", "oil", "okra", "oldie", "oleo", "olive", "omega", "omelet", "omission", "omnivore",
"oncology", "onion", "online", "onset", "opening", "opera", "operating", "operation", "operator", "ophthalmologist", "opinion", "opium",
"opossum", "opponent", "opportunist", "opportunity", "opposite", "opposition", "optimal", "optimisation", "optimist", "optimization",
"option", "orange", "orangutan", "orator", "orchard", "orchestra", "orchid", "order", "ordinary", "ordination", "ore", "oregano", "organ",
"organisation", "organising", "organization", "organizing", "orient", "orientation", "origin", "original", "originality", "ornament",
"osmosis", "osprey", "ostrich", "other", "otter", "ottoman", "ounce", "outback", "outcome", "outfielder", "outfit", "outhouse", "outlaw",
"outlay", "outlet", "outline", "outlook", "output", "outrage", "outrigger", "outrun", "outset", "outside", "oval", "ovary", "oven", "overcharge",
"overclocking", "overcoat", "overexertion", "overflight", "overhead", "overheard", "overload", "overnighter", "overshoot", "oversight",
"overview", "overweight", "owl", "owner", "ownership", "ox", "oxford", "oxygen", "oyster", "ozone", "pace", "pacemaker", "pack", "package",
"packaging", "packet", "pad", "paddle", "paddock", "pagan", "page", "pagoda", "pail", "pain", "paint", "painter", "painting", "paintwork",
"pair", "pajamas", "palace", "palate", "palm", "pamphlet", "pan", "pancake", "pancreas", "panda", "panel", "panic", "pannier", "panpipe",
"pansy", "panther", "panties", "pantologist", "pantology", "pantry", "pants", "pantsuit", "panty", "pantyhose", "papa", "papaya", "paper",
"paperback", "paperwork", "parable", "parachute", "parade", "paradise", "paragraph", "parallelogram", "paramecium", "paramedic", "parameter",
"paranoia", "parcel", "parchment", "pard", "pardon", "parent", "parenthesis", "parenting", "park", "parka", "parking", "parliament",
"parole", "parrot", "parser", "parsley", "parsnip", "part", "participant", "participation", "particle", "particular", "partner", "partnership",
"partridge", "party", "pass", "passage", "passbook", "passenger", "passing", "passion", "passive", "passport", "password", "past", "pasta",
"paste", "pastor", "pastoralist", "pastry", "pasture", "pat", "patch", "pate", "patent", "patentee", "path", "pathogenesis", "pathology",
"pathway", "patience", "patient", "patina", "patio", "patriarch", "patrimony", "patriot", "patrol", "patroller", "patrolling", "patron",
"pattern", "patty", "pattypan", "pause", "pavement", "pavilion", "paw", "pawnshop", "pay", "payee", "payment", "payoff", "pea", "peace",
"peach", "peacoat", "peacock", "peak", "peanut", "pear", "pearl", "peasant", "pecan", "pecker", "pedal", "peek", "peen", "peer",
"pegboard", "pelican", "pelt", "pen", "penalty", "pence", "pencil", "pendant", "pendulum", "penguin", "penicillin", "peninsula", "penis",
"pennant", "penny", "pension", "pentagon", "peony", "people", "pepper", "pepperoni", "percent", "percentage", "perception", "perch",
"perennial", "perfection", "performance", "perfume", "period", "periodical", "peripheral", "permafrost", "permission", "permit", "perp",
"perpendicular", "persimmon", "person", "personal", "personality", "personnel", "perspective", "pest", "pet", "petal", "petition", "petitioner",
"petticoat", "pew", "pharmacist", "pharmacopoeia", "phase", "pheasant", "phenomenon", "phenotype", "pheromone", "philanthropy", "philosopher",
"philosophy", "phone", "phosphate", "photo", "photodiode", "photograph", "photographer", "photography", "photoreceptor", "phrase", "phrasing",
"physical", "physics", "physiology", "pianist", "piano", "piccolo", "pick", "pickax", "pickaxe", "picket", "pickle", "pickup", "picnic",
"picture", "picturesque", "pie", "piece", "pier", "piety", "pig", "pigeon", "piglet", "pigpen", "pigsty", "pike", "pilaf", "pile", "pilgrim",
"pilgrimage", "pill", "pillar", "pillbox", "pillow", "pilot", "pimp", "pimple", "pin", "pinafore", "pine", "pineapple",
"pinecone", "ping", "pink", "pinkie", "pinot", "pinstripe", "pint", "pinto", "pinworm", "pioneer", "pipe", "pipeline", "piracy", "pirate",
"pistol", "pit", "pita", "pitch", "pitcher", "pitching", "pith", "pizza", "place", "placebo", "placement", "placode", "plagiarism",
"plain", "plaintiff", "plan", "plane", "planet", "planning", "plant", "plantation", "planter", "planula", "plaster", "plasterboard",
"plastic", "plate", "platelet", "platform", "platinum", "platter", "platypus", "play", "player", "playground", "playroom", "playwright",
"plea", "pleasure", "pleat", "pledge", "plenty", "plier", "pliers", "plight", "plot", "plough", "plover", "plow", "plowman", "plug",
"plugin", "plum", "plumber", "plume", "plunger", "plywood", "pneumonia", "pocket", "pocketbook", "pod", "podcast", "poem",
"poet", "poetry", "poignance", "point", "poison", "poisoning", "poker", "polarisation", "polarization", "pole", "polenta", "police",
"policeman", "policy", "polish", "politician", "politics", "poll", "polliwog", "pollutant", "pollution", "polo", "polyester", "polyp",
"pomegranate", "pomelo", "pompom", "poncho", "pond", "pony", "pool", "poor", "pop", "popcorn", "poppy", "popsicle", "popularity", "population",
"populist", "porcelain", "porch", "porcupine", "pork", "porpoise", "port", "porter", "portfolio", "porthole", "portion", "portrait",
"position", "possession", "possibility", "possible", "post", "postage", "postbox", "poster", "posterior", "postfix", "pot", "potato",
"potential", "pottery", "potty", "pouch", "poultry", "pound", "pounding", "poverty", "powder", "power", "practice", "practitioner", "prairie",
"praise", "pray", "prayer", "precedence", "precedent", "precipitation", "precision", "predecessor", "preface", "preference", "prefix",
"pregnancy", "prejudice", "prelude", "premeditation", "premier", "premise", "premium", "preoccupation", "preparation", "prescription",
"presence", "present", "presentation", "preservation", "preserves", "presidency", "president", "press", "pressroom", "pressure", "pressurisation",
"pressurization", "prestige", "presume", "pretzel", "prevalence", "prevention", "prey", "price", "pricing", "pride", "priest", "priesthood",
"primary", "primate", "prince", "princess", "principal", "principle", "print", "printer", "printing", "prior", "priority", "prison",
"prisoner", "privacy", "private", "privilege", "prize", "prizefight", "probability", "probation", "probe", "problem", "procedure", "proceedings",
"process", "processing", "processor", "proctor", "procurement", "produce", "producer", "product", "production", "productivity", "profession",
"professional", "professor", "profile", "profit", "progenitor", "program", "programme", "programming", "progress", "progression", "prohibition",
"project", "proliferation", "promenade", "promise", "promotion", "prompt", "pronoun", "pronunciation", "proof", "propaganda",
"propane", "property", "prophet", "proponent", "proportion", "proposal", "proposition", "proprietor", "prose", "prosecution", "prosecutor",
"prospect", "prosperity", "prostacyclin", "prostanoid", "prostrate", "protection", "protein", "protest", "protocol", "providence", "provider",
"province", "provision", "prow", "proximal", "proximity", "prune", "pruner", "pseudocode", "pseudoscience", "psychiatrist", "psychoanalyst",
"psychologist", "psychology", "ptarmigan", "pub", "public", "publication", "publicity", "publisher", "publishing", "pudding", "puddle",
"puffin", "pug", "puggle", "pulley", "pulse", "puma", "pump", "pumpernickel", "pumpkin", "pumpkinseed", "pun", "punch", "punctuation",
"punishment", "pup", "pupa", "pupil", "puppet", "puppy", "purchase", "puritan", "purity", "purple", "purpose", "purr", "purse", "pursuit",
"push", "pusher", "put", "puzzle", "pyramid", "pyridine", "quadrant", "quail", "qualification", "quality", "quantity", "quart", "quarter",
"quartet", "quartz", "queen", "query", "quest", "question", "questioner", "questionnaire", "quiche", "quicksand", "quiet", "quill", "quilt",
"quince", "quinoa", "quit", "quiver", "quota", "quotation", "quote", "rabbi", "rabbit", "raccoon", "race", "racer", "racing", "racism",
"racist", "rack", "radar", "radiator", "radio", "radiosonde", "radish", "raffle", "raft", "rag", "rage", "raid", "rail", "railing", "railroad",
"railway", "raiment", "rain", "rainbow", "raincoat", "rainmaker", "rainstorm", "rainy", "raise", "raisin", "rake", "rally", "ram", "rambler",
"ramen", "ramie", "ranch", "rancher", "randomisation", "randomization", "range", "ranger", "rank", "rap", "rape", "raspberry", "rat",
"rate", "ratepayer", "rating", "ratio", "rationale", "rations", "raven", "ravioli", "rawhide", "ray", "rayon", "razor", "reach", "reactant",
"reaction", "read", "reader", "readiness", "reading", "real", "reality", "realization", "realm", "reamer", "rear", "reason", "reasoning",
"rebel", "rebellion", "reboot", "recall", "recapitulation", "receipt", "receiver", "reception", "receptor", "recess", "recession", "recipe",
"recipient", "reciprocity", "reclamation", "recliner", "recognition", "recollection", "recommendation", "reconsideration", "record",
"recorder", "recording", "recovery", "recreation", "recruit", "rectangle", "red", "redesign", "redhead", "redirect", "rediscovery", "reduction",
"reef", "refectory", "reference", "referendum", "reflection", "reform", "refreshments", "refrigerator", "refuge", "refund", "refusal",
"refuse", "regard", "regime", "region", "regionalism", "register", "registration", "registry", "regret", "regulation", "regulator",
"rehospitalization", "reindeer", "reinscription", "reject", "relation", "relationship", "relative", "relaxation", "relay", "release",
"reliability", "relief", "religion", "relish", "reluctance", "remains", "remark", "reminder", "remnant", "remote", "removal", "renaissance",
"rent", "reorganisation", "reorganization", "repair", "reparation", "repayment", "repeat", "replacement", "replica", "replication", "reply",
"report", "reporter", "reporting", "repository", "representation", "representative", "reprocessing", "republic", "republican", "reputation",
"request", "requirement", "resale", "rescue", "research", "researcher", "resemblance", "reservation", "reserve", "reservoir", "reset",
"residence", "resident", "residue", "resist", "resistance", "resolution", "resolve", "resort", "resource", "respect", "respite", "response",
"responsibility", "rest", "restaurant", "restoration", "restriction", "restroom", "restructuring", "result", "resume", "retailer", "retention",
"rethinking", "retina", "retirement", "retouching", "retreat", "retrospect", "retrospective", "retrospectivity", "return", "reunion",
"revascularisation", "revascularization", "reveal", "revelation", "revenant", "revenge", "revenue", "reversal", "reverse", "review",
"revitalisation", "revitalization", "revival", "revolution", "revolver", "reward", "rhetoric", "rheumatism", "rhinoceros", "rhubarb",
"rhyme", "rhythm", "rib", "ribbon", "rice", "riddle", "ride", "rider", "ridge", "riding", "rifle", "right", "rim", "ring", "ringworm",
"riot", "rip", "ripple", "rise", "riser", "risk", "rite", "ritual", "river", "riverbed", "rivulet", "road", "roadway", "roar", "roast",
"robe", "robin", "robot", "robotics", "rock", "rocker", "rocket", "rod", "role", "roll", "roller", "romaine", "romance",
"roof", "room", "roommate", "rooster", "root", "rope", "rose", "rosemary", "roster", "rostrum", "rotation", "round", "roundabout", "route",
"router", "routine", "row", "rowboat", "rowing", "rubber", "rubric", "ruby", "ruckus", "rudiment", "ruffle", "rug", "rugby",
"ruin", "rule", "ruler", "ruling", "rum", "rumor", "run", "runaway", "runner", "running", "runway", "rush", "rust", "rutabaga", "rye",
"sabre", "sac", "sack", "saddle", "sadness", "safari", "safe", "safeguard", "safety", "saffron", "sage", "sail", "sailboat", "sailing",
"sailor", "saint", "sake", "salad", "salami", "salary", "sale", "salesman", "salmon", "salon", "saloon", "salsa", "salt", "salute", "samovar",
"sampan", "sample", "samurai", "sanction", "sanctity", "sanctuary", "sand", "sandal", "sandbar", "sandpaper", "sandwich", "sanity", "sardine",
"sari", "sarong", "sash", "satellite", "satin", "satire", "satisfaction", "sauce", "saucer", "sauerkraut", "sausage", "savage", "savannah",
"saving", "savings", "savior", "saviour", "savory", "saw", "saxophone", "scaffold", "scale", "scallion", "scallops", "scalp", "scam",
"scanner", "scarecrow", "scarf", "scarification", "scenario", "scene", "scenery", "scent", "schedule", "scheduling", "schema", "scheme",
"schizophrenic", "schnitzel", "scholar", "scholarship", "school", "schoolhouse", "schooner", "science", "scientist", "scimitar", "scissors",
"scooter", "scope", "score", "scorn", "scorpion", "scotch", "scout", "scow", "scrambled", "scrap", "scraper", "scratch", "screamer",
"screen", "screening", "screenwriting", "screw", "screwdriver", "scrim", "scrip", "script", "scripture", "scrutiny", "sculpting",
"sculptural", "sculpture", "sea", "seabass", "seafood", "seagull", "seal", "seaplane", "search", "seashore", "seaside", "season", "seat",
"seaweed", "second", "secrecy", "secret", "secretariat", "secretary", "secretion", "section", "sectional", "sector", "security", "sediment",
"seed", "seeder", "seeker", "seep", "segment", "seizure", "selection", "self", "seller",
"selling", "semantics", "semester", "semicircle", "semicolon", "semiconductor", "seminar", "senate", "senator", "sender", "senior", "sense",
"sensibility", "sensitive", "sensitivity", "sensor", "sentence", "sentencing", "sentiment", "sepal", "separation", "septicaemia", "sequel",
"sequence", "serial", "series", "sermon", "serum", "serval", "servant", "server", "service", "servitude", "sesame", "session", "set",
"setback", "setting", "settlement", "settler", "severity", "sewer", "sex", "sexuality", "shack", "shackle", "shade", "shadow", "shadowbox",
"shakedown", "shaker", "shallot", "shallows", "shame", "shampoo", "shanty", "shape", "share", "shareholder", "shark", "shaw", "shawl",
"shear", "shearling", "sheath", "shed", "sheep", "sheet", "shelf", "shell", "shelter", "sherbet", "sherry", "shield", "shift", "shin",
"shine", "shingle", "ship", "shipper", "shipping", "shipyard", "shirt", "shirtdress", "shoat", "shock", "shoe",
"shoehorn", "shoelace", "shoemaker", "shoes", "shoestring", "shofar", "shoot", "shootdown", "shop", "shopper", "shopping", "shore", "shoreline",
"short", "shortage", "shorts", "shortwave", "shot", "shoulder", "shout", "shovel", "show", "shower", "shred", "shrimp",
"shrine", "shutdown", "sibling", "sick", "sickness", "side", "sideboard", "sideburns", "sidecar", "sidestream", "sidewalk", "siding",
"siege", "sigh", "sight", "sightseeing", "sign", "signal", "signature", "signet", "significance", "signify", "signup", "silence", "silica",
"silicon", "silk", "silkworm", "sill", "silly", "silo", "silver", "similarity", "simple", "simplicity", "simplification", "simvastatin",
"sin", "singer", "singing", "singular", "sink", "sinuosity", "sip", "sir", "sister", "sitar", "site", "situation", "size",
"skate", "skating", "skean", "skeleton", "ski", "skiing", "skill", "skin", "skirt", "skull", "skullcap", "skullduggery", "skunk", "sky",
"skylight", "skyline", "skyscraper", "skywalk", "slang", "slapstick", "slash", "slate", "slavery", "slaw", "sled", "sledge",
"sleep", "sleepiness", "sleeping", "sleet", "sleuth", "slice", "slide", "slider", "slime", "slip", "slipper", "slippers", "slope", "slot",
"sloth", "slump", "smell", "smelting", "smile", "smith", "smock", "smog", "smoke", "smoking", "smolt", "smuggling", "snack", "snail",
"snake", "snakebite", "snap", "snarl", "sneaker", "sneakers", "sneeze", "sniffle", "snob", "snorer", "snow", "snowboarding", "snowflake",
"snowman", "snowmobiling", "snowplow", "snowstorm", "snowsuit", "snuck", "snug", "snuggle", "soap", "soccer", "socialism", "socialist",
"society", "sociology", "sock", "socks", "soda", "sofa", "softball", "softdrink", "softening", "software", "soil", "soldier", "sole",
"solicitation", "solicitor", "solidarity", "solidity", "soliloquy", "solitaire", "solution", "solvency", "sombrero", "somebody", "someone",
"someplace", "somersault", "something", "somewhere", "son", "sonar", "sonata", "song", "songbird", "sonnet", "soot", "sophomore", "soprano",
"sorbet", "sorghum", "sorrel", "sorrow", "sort", "soul", "soulmate", "sound", "soundness", "soup", "source", "sourwood", "sousaphone",
"south", "southeast", "souvenir", "sovereignty", "sow", "soy", "soybean", "space", "spacing", "spade", "spaghetti", "span", "spandex",
"spank", "sparerib", "spark", "sparrow", "spasm", "spat", "spatula", "spawn", "speaker", "speakerphone", "speaking", "spear", "spec",
"special", "specialist", "specialty", "species", "specification", "spectacle", "spectacles", "spectrograph", "spectrum", "speculation",
"speech", "speed", "speedboat", "spell", "spelling", "spelt", "spending", "sphere", "sphynx", "spice", "spider", "spiderling", "spike",
"spill", "spinach", "spine", "spiral", "spirit", "spiritual", "spirituality", "spit", "spite", "spleen", "splendor", "split", "spokesman",
"spokeswoman", "sponge", "sponsor", "sponsorship", "spool", "spoon", "spork", "sport", "sportsman", "spot", "spotlight", "spouse", "sprag",
"sprat", "spray", "spread", "spreadsheet", "spree", "spring", "sprinkles", "sprinter", "sprout", "spruce", "spud", "spume", "spur", "spy",
"spyglass", "square", "squash", "squatter", "squeegee", "squid", "squirrel", "stab", "stability", "stable", "stack", "stacking", "stadium",
"staff", "stag", "stage", "stain", "stair", "staircase", "stake", "stalk", "stall", "stallion", "stamen", "stamina", "stamp", "stance",
"stand", "standard", "standardisation", "standardization", "standing", "standoff", "standpoint", "star", "starboard", "start", "starter",
"state", "statement", "statin", "station", "statistic", "statistics", "statue", "status", "statute", "stay", "steak",
"stealth", "steam", "steamroller", "steel", "steeple", "stem", "stench", "stencil", "step",
"stepdaughter", "stepmother",
"stepson", "stereo", "stew", "steward", "stick", "sticker", "stiletto", "still", "stimulation", "stimulus", "sting",
"stinger", "stitch", "stitcher", "stock", "stockings", "stole", "stomach", "stone", "stonework", "stool",
"stop", "stopsign", "stopwatch", "storage", "store", "storey", "storm", "story", "storyboard", "stot", "stove", "strait",
"strand", "stranger", "strap", "strategy", "straw", "strawberry", "strawman", "stream", "street", "streetcar", "strength", "stress",
"stretch", "strife", "strike", "string", "strip", "stripe", "strobe", "stroke", "structure", "strudel", "struggle", "stucco", "stud",
"student", "studio", "study", "stuff", "stumbling", "stump", "stupidity", "sturgeon", "sty", "style", "styling", "stylus", "sub", "subcomponent",
"subconscious", "subcontractor", "subexpression", "subgroup", "subject", "submarine", "submitter", "subprime", "subroutine", "subscription",
"subsection", "subset", "subsidence", "subsidiary", "subsidy", "substance", "substitution", "subtitle", "suburb", "subway", "success",
"succotash", "suck", "sucker", "suede", "suet", "suffocation", "sugar", "suggestion", "suicide", "suit", "suitcase", "suite", "sulfur",
"sultan", "sum", "summary", "summer", "summit", "sun", "sunbeam", "sunbonnet", "sundae", "sunday", "sundial", "sunflower", "sunglasses",
"sunlamp", "sunlight", "sunrise", "sunroom", "sunset", "sunshine", "superiority", "supermarket", "supernatural", "supervision", "supervisor",
"supper", "supplement", "supplier", "supply", "support", "supporter", "suppression", "supreme", "surface", "surfboard", "surge", "surgeon",
"surgery", "surname", "surplus", "surprise", "surround", "surroundings", "surrounds", "survey", "survival", "survivor", "sushi", "suspect",
"suspenders", "suspension", "sustainment", "sustenance", "swallow", "swamp", "swan", "swanling", "swath", "sweat", "sweater", "sweatshirt",
"sweatshop", "sweatsuit", "sweets", "swell", "swim", "swimming", "swimsuit", "swine", "swing", "switch", "switchboard", "switching",
"swivel", "sword", "swordfight", "swordfish", "sycamore", "symbol", "symmetry", "sympathy", "symptom", "syndicate", "syndrome", "synergy",
"synod", "synonym", "synthesis", "syrup", "system", "tab", "tabby", "tabernacle", "table", "tablecloth", "tablet", "tabletop",
"tachometer", "tackle", "taco", "tactics", "tactile", "tadpole", "tag", "tail", "tailbud", "tailor", "tailspin", "takeover",
"tale", "talent", "talk", "talking", "tamale", "tambour", "tambourine", "tan", "tandem", "tangerine", "tank",
"tanker", "tankful", "tap", "tape", "tapioca", "target", "taro", "tarragon", "tart", "task", "tassel", "taste", "tatami", "tattler",
"tattoo", "tavern", "tax", "taxi", "taxicab", "taxpayer", "tea", "teacher", "teaching", "team", "teammate", "teapot", "tear", "tech",
"technician", "technique", "technologist", "technology", "tectonics", "teen", "teenager", "teepee", "telephone", "telescreen", "teletype",
"television", "tell", "teller", "temp", "temper", "temperature", "temple", "tempo", "temporariness", "temporary", "temptation", "temptress",
"tenant", "tendency", "tender", "tenement", "tenet", "tennis", "tenor", "tension", "tensor", "tent", "tentacle", "tenth", "tepee", "teriyaki",
"term", "terminal", "termination", "terminology", "termite", "terrace", "terracotta", "terrapin", "terrarium", "territory", "terror",
"terrorism", "terrorist", "test", "testament", "testimonial", "testimony", "testing", "text", "textbook", "textual", "texture", "thanks",
"thaw", "theater", "theft", "theism", "theme", "theology", "theory", "therapist", "therapy", "thermals", "thermometer", "thermostat",
"thesis", "thickness", "thief", "thigh", "thing", "thinking", "thirst", "thistle", "thong", "thongs", "thorn", "thought", "thousand",
"thread", "threat", "threshold", "thrift", "thrill", "throat", "throne", "thrush", "thrust", "thug", "thumb", "thump", "thunder", "thunderbolt",
"thunderhead", "thunderstorm", "thyme", "tiara", "tic", "tick", "ticket", "tide", "tie", "tiger", "tights", "tile", "till", "tilt", "timbale",
"timber", "time", "timeline", "timeout", "timer", "timetable", "timing", "timpani", "tin", "tinderbox", "tinkle", "tintype", "tip", "tire",
"tissue", "titanium", "title", "toad", "toast", "toaster", "tobacco", "today", "toe", "toenail", "toffee", "tofu", "tog", "toga", "toilet",
"tolerance", "tolerant", "toll", "tomatillo", "tomato", "tomb", "tomography", "tomorrow", "ton", "tonality", "tone", "tongue",
"tonic", "tonight", "tool", "toot", "tooth", "toothbrush", "toothpaste", "toothpick", "top", "topic", "topsail", "toque",
"toreador", "tornado", "torso", "torte", "tortellini", "tortilla", "tortoise", "tosser", "total", "tote", "touch", "tour",
"tourism", "tourist", "tournament", "towel", "tower", "town", "townhouse", "township", "toy", "trace", "trachoma", "track",
"tracking", "tracksuit", "tract", "tractor", "trade", "trader", "trading", "tradition", "traditionalism", "traffic", "trafficker", "tragedy",
"trail", "trailer", "trailpatrol", "train", "trainer", "training", "trait", "tram", "tramp", "trance", "transaction", "transcript", "transfer",
"transformation", "transit", "transition", "translation", "transmission", "transom", "transparency", "transplantation", "transport",
"transportation", "trap", "trapdoor", "trapezium", "trapezoid", "trash", "travel", "traveler", "tray", "treasure", "treasury", "treat",
"treatment", "treaty", "tree", "trek", "trellis", "tremor", "trench", "trend", "triad", "trial", "triangle", "tribe", "tributary", "trick",
"trigger", "trigonometry", "trillion", "trim", "trinket", "trip", "tripod", "tritone", "triumph", "trolley", "trombone", "troop", "trooper",
"trophy", "trouble", "trousers", "trout", "trove", "trowel", "truck", "trumpet", "trunk", "trust", "trustee", "truth", "try", "tsunami",
"tub", "tuba", "tube", "tuber", "tug", "tugboat", "tuition", "tulip", "tumbler", "tummy", "tuna", "tune", "tunic", "tunnel",
"turban", "turf", "turkey", "turmeric", "turn", "turning", "turnip", "turnover", "turnstile", "turret", "turtle", "tusk", "tussle", "tutu",
"tuxedo", "tweet", "tweezers", "twig", "twilight", "twine", "twins", "twist", "twister", "twitter", "type", "typeface", "typewriter",
"typhoon", "ukulele", "ultimatum", "umbrella", "unblinking", "uncertainty", "uncle", "underclothes", "underestimate", "underground",
"underneath", "underpants", "underpass", "undershirt", "understanding", "understatement", "undertaker", "underwear", "underweight", "underwire",
"underwriting", "unemployment", "unibody", "uniform", "uniformity", "union", "unique", "unit", "unity", "universe", "university", "update",
"upgrade", "uplift", "upper", "upstairs", "upward", "urge", "urgency", "urn", "usage", "use", "user", "usher", "usual", "utensil", "utilisation",
"utility", "utilization", "vacation", "vaccine", "vacuum", "vagrant", "valance", "valentine", "validate", "validity", "valley", "valuable",
"value", "vampire", "van", "vanadyl", "vane", "vanilla", "vanity", "variability", "variable", "variant", "variation", "variety", "vascular",
"vase", "vault", "vaulting", "veal", "vector", "vegetable", "vegetarian", "vegetarianism", "vegetation", "vehicle", "veil", "vein", "veldt",
"vellum", "velocity", "velodrome", "velvet", "vendor", "veneer", "vengeance", "venison", "venom", "venti", "venture", "venue", "veranda",
"verb", "verdict", "verification", "vermicelli", "vernacular", "verse", "version", "vertigo", "verve", "vessel", "vest", "vestment",
"vet", "veteran", "veterinarian", "veto", "viability", "vibe", "vibraphone", "vibration", "vibrissae", "vice", "vicinity", "victim",
"victory", "video", "view", "viewer", "vignette", "villa", "village", "vine", "vinegar", "vineyard", "vintage", "vintner", "vinyl", "viola",
"violation", "violence", "violet", "violin", "virginal", "virtue", "virus", "visa", "viscose", "vise", "vision", "visit", "visitor",
"visor", "vista", "visual", "vitality", "vitamin", "vitro", "vivo", "vixen", "vodka", "vogue", "voice", "void", "vol", "volatility",
"volcano", "volleyball", "volume", "volunteer", "volunteering", "vomit", "vote", "voter", "voting", "voyage", "vulture", "wad", "wafer",
"waffle", "wage", "wagon", "waist", "waistband", "wait", "waiter", "waiting", "waitress", "waiver", "wake", "walk", "walker", "walking",
"walkway", "wall", "wallaby", "wallet", "walnut", "walrus", "wampum", "wannabe", "want", "war", "warden", "wardrobe", "warfare", "warlock",
"warlord", "warming", "warmth", "warning", "warrant", "warren", "warrior", "wasabi", "wash", "washbasin", "washcloth", "washer",
"washtub", "wasp", "waste", "wastebasket", "wasting", "watch", "watcher", "watchmaker", "water", "waterbed", "watercress", "waterfall",
"waterfront", "watermelon", "waterskiing", "waterspout", "waterwheel", "wave", "waveform", "wax", "way", "weakness", "wealth", "weapon",
"wear", "weasel", "weather", "web", "webinar", "webmail", "webpage", "website", "wedding", "wedge", "weed", "weeder", "weedkiller", "week",
"weekend", "weekender", "weight", "weird", "welcome", "welfare", "well", "west", "western", "wetland", "wetsuit",
"whack", "whale", "wharf", "wheat", "wheel", "whelp", "whey", "whip", "whirlpool", "whirlwind", "whisker", "whiskey", "whisper", "whistle",
"white", "whole", "wholesale", "wholesaler", "whorl", "wick", "widget", "widow", "width", "wife", "wifi", "wild", "wildebeest", "wilderness",
"wildlife", "will", "willingness", "willow", "win", "wind", "windage", "window", "windscreen", "windshield", "wine", "winery",
"wing", "wingman", "wingtip", "wink", "winner", "winter", "wire", "wiretap", "wiring", "wisdom", "wiseguy", "wish", "wisteria", "wit",
"witch", "withdrawal", "witness", "wok", "wolf", "woman", "wombat", "wonder", "wont", "wood", "woodchuck", "woodland",
"woodshed", "woodwind", "wool", "woolens", "word", "wording", "work", "workbench", "worker", "workforce", "workhorse", "working", "workout",
"workplace", "workshop", "world", "worm", "worry", "worship", "worshiper", "worth", "wound", "wrap", "wraparound", "wrapper", "wrapping",
"wreck", "wrecker", "wren", "wrench", "wrestler", "wriggler", "wrinkle", "wrist", "writer", "writing", "wrong", "xylophone", "yacht",
"yahoo", "yak", "yam", "yang", "yard", "yarmulke", "yarn", "yawl", "year", "yeast", "yellow", "yellowjacket", "yesterday", "yew", "yin",
"yoga", "yogurt", "yoke", "yolk", "young", "youngster", "yourself", "youth", "yoyo", "yurt", "zampone", "zebra", "zebrafish", "zen",
"zephyr", "zero", "ziggurat", "zinc", "zipper", "zither", "zombie", "zone", "zoo", "zoologist", "zoology", "zucchini"
};
std::string_view obfuscateWord(std::string_view src, WordMap & obfuscate_map, WordSet & used_nouns, SipHash hash_func)
{
/// Prevent using too many nouns
if (obfuscate_map.size() * 2 > nouns.size())
throw Exception("Too many unique identifiers in queries", ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS);
std::string_view & mapped = obfuscate_map[src];
if (!mapped.empty())
return mapped;
hash_func.update(src.data(), src.size());
std::string_view noun = nouns.begin()[hash_func.get64() % nouns.size()];
/// Prevent collisions
while (!used_nouns.insert(noun).second)
{
hash_func.update('\0');
noun = nouns.begin()[hash_func.get64() % nouns.size()];
}
mapped = noun;
return mapped;
}
void obfuscateIdentifier(std::string_view src, WriteBuffer & result, WordMap & obfuscate_map, WordSet & used_nouns, SipHash hash_func)
{
/// Find words in form 'snake_case', 'CamelCase' or 'ALL_CAPS'.
const char * src_pos = src.data();
const char * src_end = src_pos + src.size();
const char * word_begin = src_pos;
bool word_has_alphanumerics = false;
auto append_word = [&]
{
std::string_view word(word_begin, src_pos - word_begin);
if (keep_words.count(word))
{
result.write(word.data(), word.size());
}
else
{
std::string_view obfuscated_word = obfuscateWord(word, obfuscate_map, used_nouns, hash_func);
/// Match the style of source word.
bool first_caps = !word.empty() && isUpperAlphaASCII(word[0]);
bool all_caps = first_caps && word.size() >= 2 && isUpperAlphaASCII(word[1]);
for (size_t i = 0, size = obfuscated_word.size(); i < size; ++i)
{
if (all_caps || (i == 0 && first_caps))
result.write(toUpperIfAlphaASCII(obfuscated_word[i]));
else
result.write(obfuscated_word[i]);
}
}
word_begin = src_pos;
word_has_alphanumerics = false;
};
while (src_pos < src_end)
{
if (isAlphaNumericASCII(src_pos[0]))
word_has_alphanumerics = true;
if (word_has_alphanumerics && src_pos[0] == '_')
{
append_word();
result.write('_');
++word_begin;
}
else if (word_has_alphanumerics && isUpperAlphaASCII(src_pos[0]) && isLowerAlphaASCII(src_pos[-1])) /// xX
{
append_word();
}
++src_pos;
}
if (word_begin < src_pos)
append_word();
}
void obfuscateLiteral(std::string_view src, WriteBuffer & result, SipHash hash_func)
{
const char * src_pos = src.data();
const char * src_end = src_pos + src.size();
while (src_pos < src_end)
{
/// Date
if (src_pos + strlen("0000-00-00") <= src_end
&& isNumericASCII(src_pos[0])
&& isNumericASCII(src_pos[1])
&& isNumericASCII(src_pos[2])
&& isNumericASCII(src_pos[3])
&& src_pos[4] == '-'
&& isNumericASCII(src_pos[5])
&& isNumericASCII(src_pos[6])
&& src_pos[7] == '-'
&& isNumericASCII(src_pos[8])
&& isNumericASCII(src_pos[9]))
{
DayNum date;
ReadBufferFromMemory in(src_pos, strlen("0000-00-00"));
readDateText(date, in);
SipHash hash_func_date = hash_func;
if (date != 0)
{
date += hash_func_date.get64() % 256;
}
writeDateText(date, result);
src_pos += strlen("0000-00-00");
/// DateTime
if (src_pos + strlen(" 00:00:00") <= src_end
&& isNumericASCII(src_pos[1])
&& isNumericASCII(src_pos[2])
&& src_pos[3] == ':'
&& isNumericASCII(src_pos[4])
&& isNumericASCII(src_pos[5])
&& src_pos[6] == ':'
&& isNumericASCII(src_pos[7])
&& isNumericASCII(src_pos[8]))
{
result.write(src_pos[0]);
hash_func_date.update(src_pos + 1, strlen("00:00:00"));
uint64_t hash_value = hash_func_date.get64();
uint32_t new_hour = hash_value % 24;
hash_value /= 24;
uint32_t new_minute = hash_value % 60;
hash_value /= 60;
uint32_t new_second = hash_value % 60;
result.write('0' + (new_hour / 10));
result.write('0' + (new_hour % 10));
result.write(':');
result.write('0' + (new_minute / 10));
result.write('0' + (new_minute % 10));
result.write(':');
result.write('0' + (new_second / 10));
result.write('0' + (new_second % 10));
src_pos += strlen(" 00:00:00");
}
}
else if (isNumericASCII(src_pos[0]))
{
/// Number
if (src_pos[0] == '0' || src_pos[0] == '1')
{
/// Keep zero and one as is.
result.write(src_pos[0]);
++src_pos;
}
else
{
ReadBufferFromMemory in(src_pos, src_end - src_pos);
uint64_t num;
readIntText(num, in);
SipHash hash_func_num = hash_func;
hash_func_num.update(src_pos, in.count());
src_pos += in.count();
/// Obfuscate number but keep it within same power of two range.
uint64_t obfuscated = hash_func_num.get64();
uint64_t log2 = bitScanReverse(num);
obfuscated = (1ULL << log2) + obfuscated % (1ULL << log2);
writeIntText(obfuscated, result);
}
}
else if (src_pos + 1 < src_end
&& (src_pos[0] == 'e' || src_pos[0] == 'E')
&& (isNumericASCII(src_pos[1]) || (src_pos[1] == '-' && src_pos + 2 < src_end && isNumericASCII(src_pos[2]))))
{
/// Something like an exponent of floating point number. Keep it as is.
/// But if it looks like a large number, overflow it into 16 bit.
result.write(src_pos[0]);
++src_pos;
ReadBufferFromMemory in(src_pos, src_end - src_pos);
int16_t num;
readIntText(num, in);
writeIntText(num, result);
src_pos += in.count();
}
else if (isAlphaASCII(src_pos[0]))
{
/// Alphabetial characters
const char * alpha_end = src_pos + 1;
while (alpha_end < src_end && isAlphaASCII(*alpha_end))
++alpha_end;
hash_func.update(src_pos, alpha_end - src_pos);
pcg64 rng(hash_func.get64());
while (src_pos < alpha_end)
{
auto random = rng();
if (isLowerAlphaASCII(*src_pos))
result.write('a' + random % 26);
else
result.write('A' + random % 26);
++src_pos;
}
}
else if (isASCII(src_pos[0]))
{
/// Punctuation, whitespace and control characters - keep as is.
result.write(src_pos[0]);
++src_pos;
}
else if (src_pos[0] <= '\xBF')
{
/// Continuation of UTF-8 sequence.
hash_func.update(src_pos[0]);
uint64_t hash = hash_func.get64();
char c = 0x80 + hash % (0xC0 - 0x80);
result.write(c);
++src_pos;
}
else
{
/// Start of UTF-8 sequence.
hash_func.update(src_pos[0]);
uint64_t hash = hash_func.get64();
if (src_pos[0] < '\xE0')
{
char c = 0xC0 + hash % 32;
result.write(c);
}
else if (src_pos[0] < '\xF0')
{
char c = 0xE0 + hash % 16;
result.write(c);
}
else
{
char c = 0xF0 + hash % 8;
result.write(c);
}
++src_pos;
}
}
}
}
void obfuscateQueries(
std::string_view src,
WriteBuffer & result,
WordMap & obfuscate_map,
WordSet & used_nouns,
SipHash hash_func,
KnownIdentifierFunc known_identifier_func)
{
Lexer lexer(src.data(), src.data() + src.size());
while (true)
{
Token token = lexer.nextToken();
std::string_view whole_token(token.begin, token.size());
if (token.isEnd())
break;
if (token.type == TokenType::BareWord)
{
std::string whole_token_uppercase(whole_token);
Poco::toUpperInPlace(whole_token_uppercase);
if (keywords.count(whole_token_uppercase)
|| known_identifier_func(whole_token))
{
/// Keep keywords as is.
result.write(token.begin, token.size());
}
else
{
/// Obfuscate identifiers
obfuscateIdentifier(whole_token, result, obfuscate_map, used_nouns, hash_func);
}
}
else if (token.type == TokenType::QuotedIdentifier)
{
assert(token.size() >= 2);
/// Write quotes and the obfuscated content inside.
result.write(*token.begin);
obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func);
result.write(token.end[-1]);
}
else if (token.type == TokenType::Number)
{
obfuscateLiteral(whole_token, result, hash_func);
}
else if (token.type == TokenType::StringLiteral)
{
assert(token.size() >= 2);
result.write(*token.begin);
obfuscateLiteral({token.begin + 1, token.size() - 2}, result, hash_func);
result.write(token.end[-1]);
}
else if (token.type == TokenType::Comment)
{
/// Skip comments - they may contain confidential info.
}
else
{
/// Everyting else is kept as is.
result.write(token.begin, token.size());
}
}
}
}

View File

@ -0,0 +1,50 @@
#pragma once
#include <string>
#include <unordered_set>
#include <unordered_map>
#include <string_view>
#include <functional>
#include <Common/SipHash.h>
namespace DB
{
class WriteBuffer;
using WordMap = std::unordered_map<std::string_view, std::string_view>;
using WordSet = std::unordered_set<std::string_view>;
using KnownIdentifierFunc = std::function<bool(std::string_view)>;
/** Takes one or multiple queries and obfuscates them by replacing identifiers to pseudorandom words
* and replacing literals to random values, while preserving the structure of the queries and the general sense.
*
* Its intended use case is when the user wants to share their queries for testing and debugging
* but is afraid to disclose the details about their column names, domain area and values of constants.
*
* It can obfuscate multiple queries in consistent fashion - identical names will be transformed to identical results.
*
* The function is not guaranteed to always give correct result or to be secure. It's implemented in "best effort" fashion.
*
* @param src - a string with source queries.
* @param result - where the obfuscated queries will be written.
* @param obfuscate_map - information about substituted identifiers
* (pass empty map at the beginning or reuse it from previous invocation to get consistent result)
* @param used_nouns - information about words used for substitution
* (pass empty set at the beginning or reuse it from previous invocation to get consistent result)
* @param hash_func - hash function that will be used as a pseudorandom source,
* it's recommended to preseed the function before passing here.
* @param known_identifier_func - a function that returns true if identifier is known name
* (of function, aggregate function, etc. that should be kept as is). If it returns false, identifier will be obfuscated.
*/
void obfuscateQueries(
std::string_view src,
WriteBuffer & result,
WordMap & obfuscate_map,
WordSet & used_nouns,
SipHash hash_func,
KnownIdentifierFunc known_identifier_func);
}

View File

@ -85,6 +85,7 @@ SRCS(
MySQL/ASTDeclareReference.cpp
MySQL/ASTDeclareSubPartition.cpp
MySQL/ASTDeclareTableOptions.cpp
obfuscateQueries.cpp
parseDatabaseAndTableName.cpp
parseIdentifierOrStringLiteral.cpp
parseIntervalKind.cpp

View File

@ -10,7 +10,6 @@ ForkProcessor::Status ForkProcessor::prepare()
/// Check can output.
bool all_finished = true;
bool all_can_push = true;
size_t num_active_outputs = 0;
@ -18,7 +17,6 @@ ForkProcessor::Status ForkProcessor::prepare()
{
if (!output.isFinished())
{
all_finished = false;
++num_active_outputs;
/// The order is important.
@ -27,7 +25,7 @@ ForkProcessor::Status ForkProcessor::prepare()
}
}
if (all_finished)
if (0 == num_active_outputs)
{
input.close();
return Status::Finished;

View File

@ -36,7 +36,7 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column)
while (newline)
{
pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end());
pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end());
buf.position() = pos;
if (buf.position() == buf.buffer().end())
{
@ -47,12 +47,6 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column)
{
newline = false;
}
else if (*buf.position() == '\\')
{
++buf.position();
if (!buf.eof())
++buf.position();
}
}
buf.makeContinuousMemoryFromCheckpointToPos();
@ -64,10 +58,12 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column)
bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
{
if (!buf.eof())
readLineObject(*columns[0]);
if (buf.eof())
return false;
return !buf.eof();
readLineObject(*columns[0]);
return true;
}
void registerInputFormatProcessorLineAsString(FormatFactory & factory)
@ -82,4 +78,30 @@ void registerInputFormatProcessorLineAsString(FormatFactory & factory)
});
}
static bool fileSegmentationEngineLineAsStringpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
{
char * pos = in.position();
bool need_more_data = true;
while (loadAtPosition(in, memory, pos) && need_more_data)
{
pos = find_first_symbols<'\n'>(pos, in.buffer().end());
if (pos == in.buffer().end())
continue;
if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
need_more_data = false;
++pos;
}
saveUpToPosition(in, memory, pos);
return loadAtPosition(in, memory, pos);
}
void registerFileSegmentationEngineLineAsString(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("LineAsString", &fileSegmentationEngineLineAsStringpImpl);
}
}

View File

@ -44,7 +44,7 @@ public:
/// Information about different TTLs for part. Can be used by
/// TTLSelector to assign merges with TTL.
const MergeTreeDataPartTTLInfos * ttl_infos;
const MergeTreeDataPartTTLInfos * ttl_infos = nullptr;
/// Part compression codec definition.
ASTPtr compression_codec_desc;

View File

@ -717,6 +717,8 @@ protected:
bool require_part_metadata;
/// Relative path data, changes during rename for ordinary databases use
/// under lockForShare if rename is possible.
String relative_data_path;

View File

@ -223,7 +223,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
if (part_column_queried)
VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context);
std::multiset<String> part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
auto part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID());

View File

@ -56,10 +56,12 @@ void ReplicatedMergeTreeCleanupThread::run()
void ReplicatedMergeTreeCleanupThread::iterate()
{
storage.clearOldPartsAndRemoveFromZK();
storage.clearOldWriteAheadLogs();
{
auto lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations);
/// Both use relative_data_path which changes during rename, so we
/// do it under share lock
storage.clearOldWriteAheadLogs();
storage.clearOldTemporaryDirectories();
}

Some files were not shown because too many files have changed in this diff Show More