Merge branch 'master' into fix-lowcardinality-hash-join

This commit is contained in:
Antonio Andelic 2022-03-25 11:12:31 +00:00
commit d7fb51269a
159 changed files with 1999 additions and 1189 deletions

View File

@ -11,6 +11,7 @@
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
#### New Feature
@ -366,7 +367,7 @@
#### Improvement
* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch.
* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch.
* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)).
* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
endif ()
# Allows to build stripped binary in a separate directory
if (OBJCOPY_PATH AND READELF_PATH)
if (OBJCOPY_PATH AND STRIP_PATH)
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
if (INSTALL_STRIPPED_BINARIES)
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
BINARY_PATH=$1
BINARY_NAME=$(basename "$BINARY_PATH")
DESTINATION_STRIPPED_DIR=$2
OBJCOPY_PATH=${3:objcopy}
READELF_PATH=${4:readelf}
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
BUILD_ID_PREFIX=${BUILD_ID:0:2}
BUILD_ID_SUFFIX=${BUILD_ID:2}
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"

View File

@ -11,16 +11,43 @@ macro(clickhouse_strip_binary)
message(FATAL_ERROR "A binary path name must be provided for stripping binary")
endif()
if (NOT DEFINED STRIP_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for stripped binary must be provided")
endif()
add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH}
COMMENT "Stripping clickhouse binary" VERBATIM
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
)
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
endmacro()
macro(clickhouse_make_empty_debug_info_for_nfpm)
set(oneValueArgs TARGET DESTINATION_DIR)
cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN})
if (NOT DEFINED EMPTY_DEBUG_TARGET)
message(FATAL_ERROR "A target name must be provided for stripping binary")
endif()
if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for empty debug must be provided")
endif()
add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
COMMENT "Addiding empty debug info for NFPM" VERBATIM
)
install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)
endmacro()

View File

@ -170,32 +170,32 @@ else ()
message (FATAL_ERROR "Cannot find objcopy.")
endif ()
# Readelf (FIXME copypaste)
# Strip (FIXME copypaste)
if (COMPILER_GCC)
find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf")
find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip")
else ()
find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf")
find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
endif ()
if (NOT READELF_PATH AND OS_DARWIN)
if (NOT STRIP_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX)
find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
if (NOT READELF_PATH)
if (NOT STRIP_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX)
find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
endif ()
endif ()
endif ()
if (READELF_PATH)
message (STATUS "Using readelf: ${READELF_PATH}")
if (STRIP_PATH)
message (STATUS "Using strip: ${STRIP_PATH}")
else ()
message (FATAL_ERROR "Cannot find readelf.")
message (FATAL_ERROR "Cannot find strip.")
endif ()

2
contrib/libxml2 vendored

@ -1 +1 @@
Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf
Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca

View File

@ -1,4 +1,3 @@
# rebuild in #33610
# docker build -t clickhouse/docs-check .
ARG FROM_TAG=latest
FROM clickhouse/docs-builder:$FROM_TAG

View File

@ -137,7 +137,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day
```

View File

@ -10,7 +10,7 @@ cssmin==0.2.0
future==0.18.2
htmlmin==0.1.12
idna==2.10
Jinja2>=3.0.3
Jinja2==3.0.3
jinja2-highlight==0.6.1
jsmin==3.0.0
livereload==2.6.3

View File

@ -140,7 +140,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day
```

View File

@ -15,7 +15,7 @@
```
┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
│ sumburConsistentHash │ 0 │ 0 │ │
yandexConsistentHash │ 0 │ 0 │ │
kostikConsistentHash │ 0 │ 0 │ │
│ demangle │ 0 │ 0 │ │
│ addressToLine │ 0 │ 0 │ │
│ JSONExtractRaw │ 0 │ 0 │ │

View File

@ -21,8 +21,12 @@ description: |
This package contains the debugging symbols for clickhouse-common.
contents:
- src: root/usr/lib/debug
dst: /usr/lib/debug
- src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug/usr/bin/clickhouse.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS

View File

@ -473,18 +473,11 @@ else ()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()
if (NOT INSTALL_STRIPPED_BINARIES)
# Install dunny debug directory
# TODO: move logic to every place where clickhouse_strip_binary is used
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
endif()
if (ENABLE_TESTS)
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})

View File

@ -137,5 +137,10 @@ if (BUILD_STANDALONE_KEEPER)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()

View File

@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -42,6 +42,7 @@ endif()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
time_t decrease_error_period_,
size_t max_error_cap_)
: Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover"))
, default_load_balancing(load_balancing)
, get_priority_load_balancing(load_balancing)
{
const std::string & local_hostname = getFQDNOrHostName();
hostname_differences.resize(nested_pools.size());
get_priority_load_balancing.hostname_differences.resize(nested_pools.size());
for (size_t i = 0; i < nested_pools.size(); ++i)
{
ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]);
hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
}
}
@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
};
size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{
case LoadBalancing::NEAREST_HOSTNAME:
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
offset = settings->load_balancing_first_offset % nested_pools.size();
load_balancing = LoadBalancing(settings->load_balancing);
}
GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
@ -173,38 +152,14 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
{
size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{
case LoadBalancing::NEAREST_HOSTNAME:
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 5 0 1 2 3
* last_used = 3 -> get_priority: 5 4 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
offset = settings->load_balancing_first_offset % nested_pools.size();
load_balancing = LoadBalancing(settings->load_balancing);
}
return get_priority;
return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
}
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/PoolWithFailoverBase.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Client/ConnectionPool.h>
#include <chrono>
@ -109,9 +110,7 @@ private:
GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
size_t last_used = 0; /// Last used for round_robin policy.
LoadBalancing default_load_balancing;
GetPriorityForLoadBalancing get_priority_load_balancing;
};
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;

View File

@ -83,11 +83,20 @@ size_t extractMaskNumericImpl(
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls)
{
if constexpr (!column_is_short)
{
if (data.size() != mask.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
}
size_t ones_count = 0;
size_t data_index = 0;
size_t mask_size = mask.size();
for (size_t i = 0; i != mask_size; ++i)
size_t mask_size = mask.size();
size_t data_size = data.size();
size_t i = 0;
for (; i != mask_size && data_index != data_size; ++i)
{
// Change mask only where value is 1.
if (!mask[i])
@ -120,6 +129,13 @@ size_t extractMaskNumericImpl(
mask[i] = value;
}
if constexpr (column_is_short)
{
if (data_index != data_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
}
return ones_count;
}

View File

@ -31,8 +31,8 @@ public:
/// probably it worth to try to increase stack size for coroutines.
///
/// Current value is just enough for all tests in our CI. It's not selected in some special
/// way. We will have 40 pages with 4KB page size.
static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests
/// way. We will have 80 pages with 4KB page size.
static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests
explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
{

View File

@ -0,0 +1,49 @@
#include <Common/GetPriorityForLoadBalancing.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
{
std::function<size_t(size_t index)> get_priority;
switch (load_balance)
{
case LoadBalancing::NEAREST_HOSTNAME:
if (hostname_differences.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= pool_size)
last_used = 0;
++last_used;
/* Consider pool_size equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i)
{
++i;
return i < last_used ? pool_size - i : i - last_used;
};
break;
}
return get_priority;
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <Core/SettingsEnums.h>
namespace DB
{
class GetPriorityForLoadBalancing
{
public:
GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
GetPriorityForLoadBalancing(){}
bool operator == (const GetPriorityForLoadBalancing & other) const
{
return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences;
}
bool operator != (const GetPriorityForLoadBalancing & other) const
{
return !(*this == other);
}
std::function<size_t(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
LoadBalancing load_balancing = LoadBalancing::RANDOM;
private:
mutable size_t last_used = 0; /// Last used for round_robin policy.
};
}

View File

@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log
PRIVATE
string_utils
)
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
endif()

View File

@ -5,15 +5,15 @@
#include <functional>
#include <filesystem>
#include <pcg-random/pcg_random.hpp>
#include <base/logger_useful.h>
#include <base/find_symbols.h>
#include <Common/randomSeed.h>
#include <base/getFQDNOrHostName.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <Common/isLocalAddress.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/DNS.h>
#define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000
@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path)
void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_)
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{
log = &Poco::Logger::get("ZooKeeper");
hosts = hosts_;
@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
operation_timeout_ms = operation_timeout_ms_;
chroot = chroot_;
implementation = implementation_;
get_priority_load_balancing = get_priority_load_balancing_;
if (implementation == "zookeeper")
{
@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
Coordination::ZooKeeper::Nodes nodes;
nodes.reserve(hosts.size());
Strings shuffled_hosts = hosts;
/// Shuffle the hosts to distribute the load among ZooKeeper nodes.
pcg64 generator(randomSeed());
std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
std::vector<ShuffleHost> shuffled_hosts = shuffleHosts();
bool dns_error = false;
for (auto & host_string : shuffled_hosts)
for (auto & host : shuffled_hosts)
{
auto & host_string = host.host;
try
{
bool secure = bool(startsWith(host_string, "secure://"));
@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
if (secure)
host_string.erase(0, strlen("secure://"));
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString());
nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
}
catch (const Poco::Net::HostNotFoundException & e)
@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
}
}
std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
{
std::function<size_t(size_t index)> get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size());
std::vector<ShuffleHost> shuffle_hosts;
for (size_t i = 0; i < hosts.size(); ++i)
{
ShuffleHost shuffle_host;
shuffle_host.host = hosts[i];
if (get_priority)
shuffle_host.priority = get_priority(i);
shuffle_host.randomize();
shuffle_hosts.emplace_back(shuffle_host);
}
std::sort(
shuffle_hosts.begin(), shuffle_hosts.end(),
[](const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return ShuffleHost::compare(lhs, rhs);
});
return shuffle_hosts;
}
ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_)
std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{
zk_log = std::move(zk_log_);
Strings hosts_strings;
splitInto<','>(hosts_strings, hosts_string);
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
}
ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_)
std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{
zk_log = std::move(zk_log_);
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
}
struct ZooKeeperArgs
@ -213,6 +238,15 @@ struct ZooKeeperArgs
{
implementation = config.getString(config_name + "." + key);
}
else if (key == "zookeeper_load_balancing")
{
String load_balancing_str = config.getString(config_name + "." + key);
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
if (!load_balancing)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
get_priority_load_balancing.load_balancing = *load_balancing;
}
else
throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
}
@ -224,6 +258,15 @@ struct ZooKeeperArgs
if (chroot.back() == '/')
chroot.pop_back();
}
/// init get_priority_load_balancing
get_priority_load_balancing.hostname_differences.resize(hosts.size());
const String & local_hostname = getFQDNOrHostName();
for (size_t i = 0; i < hosts.size(); ++i)
{
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
}
}
Strings hosts;
@ -232,13 +275,14 @@ struct ZooKeeperArgs
int operation_timeout_ms;
std::string chroot;
std::string implementation;
GetPriorityForLoadBalancing get_priority_load_balancing;
};
ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
: zk_log(std::move(zk_log_))
{
ZooKeeperArgs args(config, config_name);
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot);
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing);
}
bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const
@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config,
if (args.implementation == implementation && implementation == "testkeeper")
return false;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot);
if (args.get_priority_load_balancing != get_priority_load_balancing)
return true;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing);
}
@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
ZooKeeperPtr ZooKeeper::startNewSession() const
{
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log);
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing);
}

View File

@ -13,7 +13,10 @@
#include <Common/Stopwatch.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/ZooKeeperConstants.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Common/thread_local_rng.h>
#include <unistd.h>
#include <random>
namespace ProfileEvents
@ -37,6 +40,25 @@ namespace zkutil
/// Preferred size of multi() command (in number of ops)
constexpr size_t MULTI_BATCH_SIZE = 100;
struct ShuffleHost
{
String host;
Int64 priority = 0;
UInt32 random = 0;
void randomize()
{
random = thread_local_rng();
}
static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return std::forward_as_tuple(lhs.priority, lhs.random)
< std::forward_as_tuple(rhs.priority, rhs.random);
}
};
using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing;
/// ZooKeeper session. The interface is substantially different from the usual libzookeeper API.
///
@ -58,14 +80,16 @@ public:
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
/** Config of the form:
<zookeeper>
@ -91,6 +115,8 @@ public:
*/
ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_);
std::vector<ShuffleHost> shuffleHosts() const;
/// Creates a new session with the same parameters. This method can be used for reconnecting
/// after the session has expired.
/// This object remains unchanged, and the new session is returned.
@ -284,7 +310,7 @@ private:
friend class EphemeralNodeHolder;
void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_);
/// The following methods don't any throw exceptions but return error codes.
Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
@ -311,6 +337,8 @@ private:
Poco::Logger * log = nullptr;
std::shared_ptr<DB::ZooKeeperLog> zk_log;
GetPriorityForLoadBalancing get_priority_load_balancing;
AtomicStopwatch session_uptime;
};

View File

@ -451,7 +451,7 @@ void ZooKeeper::connect(
}
else
{
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id);
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str());
}
}

View File

@ -11,7 +11,7 @@
constexpr size_t IPV4_BINARY_LENGTH = 4;
constexpr size_t IPV6_BINARY_LENGTH = 16;
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte.
namespace DB
{

View File

@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
{
/// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter)
size_t hostname_difference = 0;
for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
if (local_hostname[i] != host[i])

View File

@ -13,6 +13,7 @@
#include <iterator>
#include <base/sort.h>
#include <boost/algorithm/string.hpp>
namespace DB
@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
}
const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
{
if (case_insensitive)
{
auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); });
if (found == data.end())
{
return nullptr;
}
return &*found;
}
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
{
@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
}
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const
{
const auto * result = findByName(name);
const auto * result = findByName(name, case_insensitive);
if (!result)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
throw Exception(
"Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return *result;
}
bool Block::has(const std::string & name) const
bool Block::has(const std::string & name, bool case_insensitive) const
{
if (case_insensitive)
return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); })
!= data.end();
return index_by_name.end() != index_by_name.find(name);
}
@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const
{
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
throw Exception(
"Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return it->second;
}

View File

@ -60,21 +60,21 @@ public:
ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
ColumnWithTypeAndName* findByName(const std::string & name)
ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
{
return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name));
const_cast<const Block *>(this)->findByName(name, case_insensitive));
}
const ColumnWithTypeAndName * findByName(const std::string & name) const;
const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;
ColumnWithTypeAndName & getByName(const std::string & name)
ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
{
return const_cast<ColumnWithTypeAndName &>(
const_cast<const Block *>(this)->getByName(name));
const_cast<const Block *>(this)->getByName(name, case_insensitive));
}
const ColumnWithTypeAndName & getByName(const std::string & name) const;
const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;
Container::iterator begin() { return data.begin(); }
Container::iterator end() { return data.end(); }
@ -83,7 +83,7 @@ public:
Container::const_iterator cbegin() const { return data.cbegin(); }
Container::const_iterator cend() const { return data.cend(); }
bool has(const std::string & name) const;
bool has(const std::string & name, bool case_insensitive = false) const;
size_t getPositionByName(const std::string & name) const;

View File

@ -616,11 +616,13 @@ class IColumn;
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \

View File

@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
}

View File

@ -33,7 +33,7 @@ DataTypePtr DataTypeFactory::get(const String & full_name) const
/// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers)
/// let's make the threshold significantly lower.
/// It is impractical for user to have complex data types with this depth.
static constexpr size_t data_type_max_parse_depth = 150;
static constexpr size_t data_type_max_parse_depth = 200;
ParserDataType parser;
ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth);

View File

@ -15,6 +15,8 @@
#include <Parsers/IAST.h>
#include <boost/algorithm/string/case_conv.hpp>
namespace DB
{
@ -227,14 +229,17 @@ void validateArraySizes(const Block & block)
}
std::unordered_set<String> getAllTableNames(const Block & block)
std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
{
std::unordered_set<String> nested_table_names;
for (auto & name : block.getNames())
for (const auto & name : block.getNames())
{
auto nested_table_name = Nested::extractTableName(name);
if (to_lower_case)
boost::to_lower(nested_table_name);
if (!nested_table_name.empty())
nested_table_names.insert(nested_table_name);
nested_table_names.insert(std::move(nested_table_name));
}
return nested_table_names;
}

View File

@ -32,7 +32,7 @@ namespace Nested
void validateArraySizes(const Block & block);
/// Get all nested tables names from a block.
std::unordered_set<String> getAllTableNames(const Block & block);
std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
}
}

View File

@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated(
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
if (zookeeper_path.front() != '/')
zookeeper_path = "/" + zookeeper_path;
if (!db_settings.collection_name.value.empty())
fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
}
String DatabaseReplicated::getFullReplicaName() const
@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
shards.back().emplace_back(unescapeForFileName(host_port));
}
String username = db_settings.cluster_username;
String password = db_settings.cluster_password;
UInt16 default_port = getContext()->getTCPPort();
bool secure = db_settings.cluster_secure_connection;
bool treat_local_as_remote = false;
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
return std::make_shared<Cluster>(
getContext()->getSettingsRef(),
shards,
username,
password,
cluster_auth_info.cluster_username,
cluster_auth_info.cluster_password,
default_port,
treat_local_as_remote,
treat_local_port_as_remote,
secure);
cluster_auth_info.cluster_secure_connection,
/*priority=*/1,
database_name,
cluster_auth_info.cluster_secret);
}
void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
{
const auto & config_prefix = fmt::format("named_collections.{}", collection_name);
if (!config_ref.has(config_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", "");
cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", "");
cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", "");
cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false);
}
void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)

View File

@ -75,6 +75,16 @@ private:
bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
struct
{
String cluster_username{"default"};
String cluster_password;
String cluster_secret;
bool cluster_secure_connection{false};
} cluster_auth_info;
void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);

View File

@ -8,12 +8,11 @@ namespace DB
class ASTStorage;
#define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \
M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \
M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \
M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \
M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)

View File

@ -20,6 +20,7 @@
#include <Common/getRandomASCIIString.h>
#include <Interpreters/Context.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <IO/ReadBufferFromS3.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
@ -264,32 +265,6 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);
ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback)
{
pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
{
if (thread_group)
CurrentThread::attachTo(thread_group);
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
/// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
/// Typically, it may be changes from Process to User.
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
/// When, finally, we destroy the thread (and the ThreadStatus),
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
/// and by this time user-level memory tracker may be already destroyed.
///
/// As a work-around, reset memory tracker to total, which is always alive.
CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
);
callback();
});
};
auto s3_buffer = std::make_unique<WriteBufferFromS3>(
settings->client,
bucket,
@ -299,7 +274,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size,
std::move(object_metadata),
buf_size, std::move(schedule));
buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
{

View File

@ -89,10 +89,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color;
@ -123,9 +123,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.seekable_read = settings.input_format_allow_seeks;

View File

@ -32,7 +32,6 @@ struct FormatSettings
bool null_as_default = true;
bool decimal_trailing_zeros = false;
bool defaults_for_omitted_fields = true;
bool use_lowercase_column_name = false;
bool seekable_read = true;
UInt64 max_rows_to_read_for_schema_inference = 100;
@ -75,6 +74,7 @@ struct FormatSettings
bool low_cardinality_as_dictionary = false;
bool import_nested = false;
bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} arrow;
struct
@ -137,6 +137,7 @@ struct FormatSettings
UInt64 row_group_size = 1000000;
bool import_nested = false;
bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} parquet;
struct Pretty
@ -217,6 +218,7 @@ struct FormatSettings
bool import_nested = false;
bool allow_missing_columns = false;
int64_t row_batch_size = 100'000;
bool case_insensitive_column_matching = false;
} orc;
/// For capnProto format we should determine how to

View File

@ -7,9 +7,9 @@ namespace DB
{
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
struct YandexConsistentHashImpl
struct KostikConsistentHashImpl
{
static constexpr auto name = "yandexConsistentHash";
static constexpr auto name = "kostikConsistentHash";
using HashType = UInt64;
/// Actually it supports UInt64, but it is efficient only if n <= 32768
@ -23,12 +23,12 @@ struct YandexConsistentHashImpl
}
};
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>;
using FunctionKostikConsistentHash = FunctionConsistentHashImpl<KostikConsistentHashImpl>;
void registerFunctionYandexConsistentHash(FunctionFactory & factory)
void registerFunctionKostikConsistentHash(FunctionFactory & factory)
{
factory.registerFunction<FunctionYandexConsistentHash>();
factory.registerFunction<FunctionKostikConsistentHash>();
factory.registerAlias("yandexConsistentHash", "kostikConsistentHash");
}
}

View File

@ -2,12 +2,12 @@ namespace DB
{
class FunctionFactory;
void registerFunctionYandexConsistentHash(FunctionFactory & factory);
void registerFunctionKostikConsistentHash(FunctionFactory & factory);
void registerFunctionJumpConsistentHash(FunctionFactory & factory);
void registerFunctionsConsistentHashing(FunctionFactory & factory)
{
registerFunctionYandexConsistentHash(factory);
registerFunctionKostikConsistentHash(factory);
registerFunctionJumpConsistentHash(factory);
}

View File

@ -372,8 +372,8 @@ SetPtr makeExplicitSet(
element_type = low_cardinality_type->getDictionaryType();
auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
if (prepared_sets.count(set_key))
return prepared_sets.at(set_key); /// Already prepared.
if (auto it = prepared_sets.find(set_key); it != prepared_sets.end())
return it->second; /// Already prepared.
Block block;
const auto & right_arg_func = std::dynamic_pointer_cast<ASTFunction>(right_arg);
@ -388,7 +388,7 @@ SetPtr makeExplicitSet(
set->insertFromBlock(block.getColumnsWithTypeAndName());
set->finishInsert();
prepared_sets[set_key] = set;
prepared_sets.emplace(set_key, set);
return set;
}
@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
if (tid != 0)
tuple_ast = tuple_ast->clone();
auto literal = std::make_shared<ASTLiteral>(UInt64(++tid));
auto literal = std::make_shared<ASTLiteral>(UInt64{++tid});
visit(*literal, literal, data);
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
@ -814,14 +814,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (!data.only_consts)
{
/// We are in the part of the tree that we are not going to compute. You just need to define types.
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
/// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet".
auto argument_name = node.arguments->children.at(0)->getColumnName();
data.addFunction(
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
{ argument_name, argument_name },
column_name);
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
{argument_name, argument_name},
column_name);
}
return;
}
@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
if (no_subqueries)
return {};
auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
if (data.prepared_sets.count(set_key))
return data.prepared_sets.at(set_key);
if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end())
return it->second;
/// A special case is if the name of the table is specified on the right side of the IN statement,
/// and the table has the type Set (a previously prepared set).
@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
if (storage_set)
{
data.prepared_sets[set_key] = storage_set->getSet();
data.prepared_sets.emplace(set_key, storage_set->getSet());
return storage_set->getSet();
}
}
@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
/// If you already created a Set with the same subquery / table.
if (subquery_for_set.set)
{
data.prepared_sets[set_key] = subquery_for_set.set;
data.prepared_sets.emplace(set_key, subquery_for_set.set);
return subquery_for_set.set;
}
@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
}
subquery_for_set.set = set;
data.prepared_sets[set_key] = set;
data.prepared_sets.emplace(set_key, set);
return set;
}
else

View File

@ -10,6 +10,7 @@
namespace DB
{
class ASTExpressionList;
class ASTFunction;
class ExpressionActions;
@ -89,10 +90,7 @@ struct ScopeStack : WithContext
void addColumn(ColumnWithTypeAndName column);
void addAlias(const std::string & name, std::string alias);
void addArrayJoin(const std::string & source_name, std::string result_name);
void addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name);
void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name);
ActionsDAGPtr popLevel();

View File

@ -132,7 +132,9 @@ Cluster::Address::Address(
bool secure_,
Int64 priority_,
UInt32 shard_index_,
UInt32 replica_index_)
UInt32 replica_index_,
String cluster_name_,
String cluster_secret_)
: user(user_), password(password_)
{
bool can_be_local = true;
@ -164,6 +166,8 @@ Cluster::Address::Address(
is_local = can_be_local && isLocal(clickhouse_port);
shard_index = shard_index_;
replica_index = replica_index_;
cluster = cluster_name_;
cluster_secret = cluster_secret_;
}
@ -537,10 +541,14 @@ Cluster::Cluster(
bool treat_local_as_remote,
bool treat_local_port_as_remote,
bool secure,
Int64 priority)
Int64 priority,
String cluster_name,
String cluster_secret)
{
UInt32 current_shard_num = 1;
secret = cluster_secret;
for (const auto & shard : names)
{
Addresses current;
@ -554,7 +562,9 @@ Cluster::Cluster(
secure,
priority,
current_shard_num,
current.size() + 1);
current.size() + 1,
cluster_name,
cluster_secret);
addresses_with_failover.emplace_back(current);
@ -690,6 +700,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
}
}
secret = from.secret;
name = from.name;
initMisc();
}
@ -704,6 +717,9 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector
addresses_with_failover.emplace_back(from.addresses_with_failover.at(index));
}
secret = from.secret;
name = from.name;
initMisc();
}

View File

@ -55,7 +55,9 @@ public:
bool treat_local_as_remote,
bool treat_local_port_as_remote,
bool secure = false,
Int64 priority = 1);
Int64 priority = 1,
String cluster_name = "",
String cluster_secret = "");
Cluster(const Cluster &)= delete;
Cluster & operator=(const Cluster &) = delete;
@ -127,7 +129,9 @@ public:
bool secure_ = false,
Int64 priority_ = 1,
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
UInt32 replica_index_ = 0,
String cluster_name = "",
String cluster_secret_ = "");
/// Returns 'escaped_host_name:port'
String toString() const;

View File

@ -350,6 +350,12 @@ void DDLWorker::scheduleTasks(bool reinitialized)
bool maybe_concurrently_deleting = task && !zookeeper->exists(fs::path(task->entry_path) / "active");
return task && !maybe_concurrently_deleting && !maybe_currently_processing;
}
else if (last_skipped_entry_name.has_value() && !queue_fully_loaded_after_initialization_debug_helper)
{
/// If connection was lost during queue loading
/// we may start processing from finished task (because we don't know yet that it's finished) and it's ok.
return false;
}
else
{
/// Return true if entry should not be scheduled.
@ -365,7 +371,11 @@ void DDLWorker::scheduleTasks(bool reinitialized)
String reason;
auto task = initAndCheckTask(entry_name, reason, zookeeper);
if (!task)
if (task)
{
queue_fully_loaded_after_initialization_debug_helper = true;
}
else
{
LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
updateMaxDDLEntryID(entry_name);

View File

@ -131,6 +131,9 @@ protected:
std::optional<String> first_failed_task_name;
std::list<DDLTaskPtr> current_tasks;
/// This flag is needed for debug assertions only
bool queue_fully_loaded_after_initialization_debug_helper = false;
Coordination::Stat queue_node_stat;
std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();

View File

@ -259,7 +259,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a
if (!array_join_expression_list)
return src_columns;
getRootActionsNoMakeSet(array_join_expression_list, true, actions, false);
getRootActionsNoMakeSet(array_join_expression_list, actions, false);
auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left);
auto sample_columns = actions->getResultColumns();
@ -294,7 +294,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const
const ASTTablesInSelectQueryElement * join = select_query->join();
if (join)
{
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, actions, false);
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false);
auto sample_columns = actions->getNamesAndTypesList();
syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true);
actions = std::make_shared<ActionsDAG>(sample_columns);
@ -332,14 +332,14 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
{
NameSet unique_keys;
ASTs & group_asts = group_by_ast->children;
for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
for (ssize_t i = 0; i < static_cast<ssize_t>(group_asts.size()); ++i)
{
ssize_t size = group_asts.size();
if (getContext()->getSettingsRef().enable_positional_arguments)
replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY);
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
getRootActionsNoMakeSet(group_asts[i], temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName();
@ -405,8 +405,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
{
if (do_global)
{
GlobalSubqueriesVisitor::Data subqueries_data(getContext(), subquery_depth, isRemoteStorage(),
external_tables, subqueries_for_sets, has_global_subqueries);
GlobalSubqueriesVisitor::Data subqueries_data(
getContext(), subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries);
GlobalSubqueriesVisitor(subqueries_data).visit(query);
}
}
@ -416,7 +416,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
{
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
if (prepared_sets.count(set_key))
if (prepared_sets.contains(set_key))
return; /// Already prepared.
if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name))
@ -509,33 +509,62 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
}
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, false, only_consts, !isRemoteStorage());
ActionsVisitor::Data visitor_data(
getContext(),
settings.size_limits_for_set,
subquery_depth,
sourceColumns(),
std::move(actions),
prepared_sets,
subqueries_for_sets,
no_makeset_for_subqueries,
false /* no_makeset */,
only_consts,
!isRemoteStorage() /* create_source_for_in */);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, true, only_consts, !isRemoteStorage());
ActionsVisitor::Data visitor_data(
getContext(),
settings.size_limits_for_set,
subquery_depth,
sourceColumns(),
std::move(actions),
prepared_sets,
subqueries_for_sets,
true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
true /* no_makeset */,
only_consts,
!isRemoteStorage() /* create_source_for_in */);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
void ExpressionAnalyzer::getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActionsForHaving(
const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, false, only_consts, true);
ActionsVisitor::Data visitor_data(
getContext(),
settings.size_limits_for_set,
subquery_depth,
sourceColumns(),
std::move(actions),
prepared_sets,
subqueries_for_sets,
no_makeset_for_subqueries,
false /* no_makeset */,
only_consts,
true /* create_source_for_in */);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
@ -547,7 +576,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
{
AggregateDescription aggregate;
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, true, actions);
getRootActionsNoMakeSet(node->arguments, actions);
aggregate.column_name = node->getColumnName();
@ -746,8 +775,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
// Hence we clone the node (not very sane either, I know).
getRootActionsNoMakeSet(window_function.function_node->clone(),
true, actions);
getRootActionsNoMakeSet(window_function.function_node->clone(), actions);
const ASTs & arguments
= window_function.function_node->arguments->children;
@ -867,8 +895,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi
auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left);
before_array_join = chain.getLastActions();
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(
array_join, step.getResultColumns()));
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(array_join, step.getResultColumns()));
chain.addStep();
@ -1099,8 +1126,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
}
}
chain.steps.emplace_back(std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(
std::make_shared<ActionsDAG>(std::move(columns))));
chain.steps.emplace_back(
std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(std::make_shared<ActionsDAG>(std::move(columns))));
chain.steps.back()->additional_input = std::move(unused_source_columns);
chain.getLastActions();
chain.addStep();
@ -1210,8 +1237,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// recursively together with (1b) as ASTFunction::window_definition.
if (getSelectQuery()->window())
{
getRootActionsNoMakeSet(getSelectQuery()->window(),
true /* no_subqueries */, step.actions());
getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions());
}
for (const auto & [_, w] : window_descriptions)
@ -1222,8 +1248,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// definitions (1a).
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
getRootActionsNoMakeSet(f.function_node->clone(),
true /* no_subqueries */, step.actions());
getRootActionsNoMakeSet(f.function_node->clone(), step.actions());
// (2b) Required function argument columns.
for (const auto & a : f.function_node->arguments->children)
@ -1456,7 +1481,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
alias = name;
result_columns.emplace_back(name, alias);
result_names.push_back(alias);
getRootActions(ast, false, actions_dag);
getRootActions(ast, false /* no_makeset_for_subqueries */, actions_dag);
}
if (add_aliases)
@ -1496,7 +1521,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAn
{
auto actions = std::make_shared<ActionsDAG>(constant_inputs);
getRootActions(query, true, actions, true);
getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */);
return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(getContext()));
}
@ -1513,13 +1538,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions()
}
ExpressionAnalysisResult::ExpressionAnalysisResult(
SelectQueryExpressionAnalyzer & query_analyzer,
const StorageMetadataPtr & metadata_snapshot,
bool first_stage_,
bool second_stage_,
bool only_types,
const FilterDAGInfoPtr & filter_info_,
const Block & source_header)
SelectQueryExpressionAnalyzer & query_analyzer,
const StorageMetadataPtr & metadata_snapshot,
bool first_stage_,
bool second_stage_,
bool only_types,
const FilterDAGInfoPtr & filter_info_,
const Block & source_header)
: first_stage(first_stage_)
, second_stage(second_stage_)
, need_aggregate(query_analyzer.hasAggregation())

View File

@ -172,15 +172,15 @@ protected:
ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
/** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
* analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
* prepared sets would not be applicable for MergeTree index optimization.
*/
void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
* Create a set of columns aggregated_columns resulting after the aggregation, if any,

View File

@ -10,6 +10,7 @@
#include <Interpreters/interpretSubquery.h>
#include <Interpreters/SubqueryForSet.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
@ -17,7 +18,11 @@
#include <Parsers/IAST.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Common/typeid_cast.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/ConstraintsDescription.h>
#include <Storages/IStorage.h>
namespace DB
{
@ -34,7 +39,6 @@ public:
{
size_t subquery_depth;
bool is_remote;
size_t external_table_id;
TemporaryTablesMapping & external_tables;
SubqueriesForSets & subqueries_for_sets;
bool & has_global_subqueries;
@ -49,7 +53,6 @@ public:
: WithContext(context_)
, subquery_depth(subquery_depth_)
, is_remote(is_remote_)
, external_table_id(1)
, external_tables(tables)
, subqueries_for_sets(subqueries_for_sets_)
, has_global_subqueries(has_global_subqueries_)
@ -92,48 +95,33 @@ public:
{
/// If this is already an external table, you do not need to add anything. Just remember its presence.
auto temporary_table_name = getIdentifierName(subquery_or_table_name);
bool exists_in_local_map = external_tables.end() != external_tables.find(temporary_table_name);
bool exists_in_local_map = external_tables.contains(temporary_table_name);
bool exists_in_context = static_cast<bool>(getContext()->tryResolveStorageID(
StorageID("", temporary_table_name), Context::ResolveExternal));
if (exists_in_local_map || exists_in_context)
return;
}
String external_table_name = subquery_or_table_name->tryGetAlias();
if (external_table_name.empty())
String alias = subquery_or_table_name->tryGetAlias();
String external_table_name;
if (alias.empty())
{
/// Generate the name for the external table.
external_table_name = "_data" + toString(external_table_id);
while (external_tables.count(external_table_name))
{
++external_table_id;
external_table_name = "_data" + toString(external_table_id);
}
auto hash = subquery_or_table_name->getTreeHash();
external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second);
}
auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
Block sample = interpreter->getSampleBlock();
NamesAndTypesList columns = sample.getNamesAndTypesList();
auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
getContext(),
ColumnsDescription{columns},
ConstraintsDescription{},
nullptr,
/*create_for_global_subquery*/ true);
StoragePtr external_storage = external_storage_holder->getTable();
else
external_table_name = alias;
/** We replace the subquery with the name of the temporary table.
* It is in this form, the request will go to the remote server.
* This temporary table will go to the remote server, and on its side,
* instead of doing a subquery, you just need to read it.
* TODO We can do better than using alias to name external tables
*/
auto database_and_table_name = std::make_shared<ASTTableIdentifier>(external_table_name);
if (set_alias)
{
String alias = subquery_or_table_name->tryGetAlias();
if (auto * table_name = subquery_or_table_name->as<ASTTableIdentifier>())
if (alias.empty())
alias = table_name->shortName();
@ -151,8 +139,27 @@ public:
else
ast = database_and_table_name;
external_tables[external_table_name] = external_storage_holder;
if (external_tables.contains(external_table_name))
return;
auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
Block sample = interpreter->getSampleBlock();
NamesAndTypesList columns = sample.getNamesAndTypesList();
auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
getContext(),
ColumnsDescription{columns},
ConstraintsDescription{},
nullptr,
/*create_for_global_subquery*/ true);
StoragePtr external_storage = external_storage_holder->getTable();
external_tables.emplace(external_table_name, external_storage_holder);
/// We need to materialize external tables immediately because reading from distributed
/// tables might generate local plans which can refer to external tables during index
/// analysis. It's too late to populate the external table via CreatingSetsTransform.
if (getContext()->getSettingsRef().use_index_for_in_with_subqueries)
{
auto external_table = external_storage_holder->getTable();

View File

@ -1242,10 +1242,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
}
preliminary_sort();
// If there is no global subqueries, we can run subqueries only when receive them on server.
if (!query_analyzer->hasGlobalSubqueries() && !subqueries_for_sets.empty())
executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
}
if (expressions.second_stage || from_aggregation_stage)
@ -1428,7 +1424,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
}
}
if (!subqueries_for_sets.empty() && (expressions.hasHaving() || query_analyzer->hasGlobalSubqueries()))
if (!subqueries_for_sets.empty())
executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
}
@ -1892,7 +1888,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
&& limit_length <= std::numeric_limits<UInt64>::max() - limit_offset
&& limit_length + limit_offset < max_block_size)
{
max_block_size = std::max(UInt64(1), limit_length + limit_offset);
max_block_size = std::max(UInt64{1}, limit_length + limit_offset);
max_threads_execute_query = max_streams = 1;
}
@ -2578,11 +2574,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan)
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets)
{
const auto & input_order_info = query_info.input_order_info
? query_info.input_order_info
: (query_info.projection ? query_info.projection->input_order_info : nullptr);
if (input_order_info)
executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
// const auto & input_order_info = query_info.input_order_info
// ? query_info.input_order_info
// : (query_info.projection ? query_info.projection->input_order_info : nullptr);
// if (input_order_info)
// executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
const Settings & settings = context->getSettingsRef();

View File

@ -0,0 +1,39 @@
#include "threadPoolCallbackRunner.h"
#include <base/scope_guard_safe.h>
#include <Common/CurrentThread.h>
namespace DB
{
CallbackRunner threadPoolCallbackRunner(ThreadPool & pool)
{
return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback)
{
pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
{
if (thread_group)
CurrentThread::attachTo(thread_group);
SCOPE_EXIT_SAFE({
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
/// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
/// Typically, it may be changes from Process to User.
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
/// When, finally, we destroy the thread (and the ThreadStatus),
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
/// and by this time user-level memory tracker may be already destroyed.
///
/// As a work-around, reset memory tracker to total, which is always alive.
CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
});
callback();
});
};
}
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Common/ThreadPool.h>
namespace DB
{
/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously
using CallbackRunner = std::function<void(std::function<void()>)>;
/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'
CallbackRunner threadPoolCallbackRunner(ThreadPool & pool);
}

View File

@ -139,7 +139,11 @@ void ArrowBlockInputFormat::prepareReader()
}
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
getPort().getHeader(), "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns);
getPort().getHeader(),
"Arrow",
format_settings.arrow.import_nested,
format_settings.arrow.allow_missing_columns,
format_settings.arrow.case_insensitive_column_matching);
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
if (stream)

View File

@ -31,6 +31,7 @@
#include <algorithm>
#include <arrow/builder.h>
#include <arrow/array.h>
#include <boost/algorithm/string/case_conv.hpp>
/// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn.
#define FOR_ARROW_NUMERIC_TYPES(M) \
@ -484,19 +485,22 @@ static void checkStatus(const arrow::Status & status, const String & column_name
throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()};
}
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header)
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
const arrow::Schema & schema, const std::string & format_name, const Block * hint_header, bool ignore_case)
{
ColumnsWithTypeAndName sample_columns;
std::unordered_set<String> nested_table_names;
if (hint_header)
nested_table_names = Nested::getAllTableNames(*hint_header);
nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case);
for (const auto & field : schema.fields())
{
if (hint_header && !hint_header->has(field->name()) && !nested_table_names.contains(field->name()))
if (hint_header && !hint_header->has(field->name(), ignore_case)
&& !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name()))
continue;
/// Create empty arrow column by it's type and convert it to ClickHouse column.
arrow::MemoryPool* pool = arrow::default_memory_pool();
arrow::MemoryPool * pool = arrow::default_memory_pool();
std::unique_ptr<arrow::ArrayBuilder> array_builder;
arrow::Status status = MakeBuilder(pool, field->type(), &array_builder);
checkStatus(status, field->name(), format_name);
@ -516,20 +520,31 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema,
}
ArrowColumnToCHColumn::ArrowColumnToCHColumn(
const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_)
: header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_)
const Block & header_,
const std::string & format_name_,
bool import_nested_,
bool allow_missing_columns_,
bool case_insensitive_matching_)
: header(header_)
, format_name(format_name_)
, import_nested(import_nested_)
, allow_missing_columns(allow_missing_columns_)
, case_insensitive_matching(case_insensitive_matching_)
{
}
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
{
NameToColumnPtr name_to_column_ptr;
for (const auto & column_name : table->ColumnNames())
for (auto column_name : table->ColumnNames())
{
std::shared_ptr<arrow::ChunkedArray> arrow_column = table->GetColumnByName(column_name);
if (!arrow_column)
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name);
name_to_column_ptr[column_name] = arrow_column;
if (case_insensitive_matching)
boost::to_lower(column_name);
name_to_column_ptr[std::move(column_name)] = arrow_column;
}
arrowColumnsToCHChunk(res, name_to_column_ptr);
@ -548,22 +563,31 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
{
const ColumnWithTypeAndName & header_column = header.getByPosition(column_i);
auto search_column_name = header_column.name;
if (case_insensitive_matching)
boost::to_lower(search_column_name);
bool read_from_nested = false;
String nested_table_name = Nested::extractTableName(header_column.name);
if (!name_to_column_ptr.contains(header_column.name))
String search_nested_table_name = nested_table_name;
if (case_insensitive_matching)
boost::to_lower(search_nested_table_name);
if (!name_to_column_ptr.contains(search_column_name))
{
/// Check if it's a column from nested table.
if (import_nested && name_to_column_ptr.contains(nested_table_name))
if (import_nested && name_to_column_ptr.contains(search_nested_table_name))
{
if (!nested_tables.contains(nested_table_name))
if (!nested_tables.contains(search_nested_table_name))
{
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[nested_table_name];
ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)};
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[search_nested_table_name];
ColumnsWithTypeAndName cols
= {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)};
Block block(cols);
nested_tables[nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
nested_tables[search_nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
}
read_from_nested = nested_tables[nested_table_name]->has(header_column.name);
read_from_nested = nested_tables[search_nested_table_name]->has(header_column.name, case_insensitive_matching);
}
if (!read_from_nested)
@ -580,13 +604,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
}
}
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[header_column.name];
ColumnWithTypeAndName column;
if (read_from_nested)
column = nested_tables[nested_table_name]->getByName(header_column.name);
{
column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching);
if (case_insensitive_matching)
column.name = header_column.name;
}
else
{
auto arrow_column = name_to_column_ptr[search_column_name];
column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true);
}
try
{
@ -594,8 +624,11 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
}
catch (Exception & e)
{
e.addMessage(fmt::format("while converting column {} from type {} to type {}",
backQuote(header_column.name), column.type->getName(), header_column.type->getName()));
e.addMessage(fmt::format(
"while converting column {} from type {} to type {}",
backQuote(header_column.name),
column.type->getName(),
header_column.type->getName()));
throw;
}
@ -609,22 +642,23 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
std::vector<size_t> ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const
{
std::vector<size_t> missing_columns;
auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header);
auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching);
auto flatten_block_from_arrow = Nested::flatten(block_from_arrow);
for (size_t i = 0, columns = header.columns(); i < columns; ++i)
{
const auto & column = header.getByPosition(i);
const auto & header_column = header.getByPosition(i);
bool read_from_nested = false;
String nested_table_name = Nested::extractTableName(column.name);
if (!block_from_arrow.has(column.name))
String nested_table_name = Nested::extractTableName(header_column.name);
if (!block_from_arrow.has(header_column.name, case_insensitive_matching))
{
if (import_nested && block_from_arrow.has(nested_table_name))
read_from_nested = flatten_block_from_arrow.has(column.name);
if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching))
read_from_nested = flatten_block_from_arrow.has(header_column.name, case_insensitive_matching);
if (!read_from_nested)
{
if (!allow_missing_columns)
throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", column.name};
throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name};
missing_columns.push_back(i);
}

View File

@ -25,7 +25,8 @@ public:
const Block & header_,
const std::string & format_name_,
bool import_nested_,
bool allow_missing_columns_);
bool allow_missing_columns_,
bool case_insensitive_matching_ = false);
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
@ -36,7 +37,8 @@ public:
/// Transform arrow schema to ClickHouse header. If hint_header is provided,
/// we will skip columns in schema that are not in hint_header.
static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr);
static Block arrowSchemaToCHHeader(
const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr, bool ignore_case = false);
private:
const Block & header;
@ -44,6 +46,7 @@ private:
bool import_nested;
/// If false, throw exception if some columns in header not exists in arrow table.
bool allow_missing_columns;
bool case_insensitive_matching;
/// Map {column name : dictionary column}.
/// To avoid converting dictionary from Arrow Dictionary

View File

@ -53,9 +53,6 @@ Chunk ORCBlockInputFormat::generate()
if (!table || !table->num_rows())
return res;
if (format_settings.use_lowercase_column_name)
table = *table->RenameColumns(include_column_names);
arrow_column_to_ch_column->arrowTableToCHChunk(res, table);
/// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
/// Otherwise fill the missing columns with zero values of its type.
@ -73,7 +70,6 @@ void ORCBlockInputFormat::resetParser()
file_reader.reset();
include_indices.clear();
include_column_names.clear();
block_missing_values.clear();
}
@ -125,20 +121,6 @@ static void getFileReaderAndSchema(
if (!read_schema_result.ok())
throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS);
schema = std::move(read_schema_result).ValueOrDie();
if (format_settings.use_lowercase_column_name)
{
std::vector<std::shared_ptr<::arrow::Field>> fields;
fields.reserve(schema->num_fields());
for (int i = 0; i < schema->num_fields(); ++i)
{
const auto& field = schema->field(i);
auto name = field->name();
boost::to_lower(name);
fields.push_back(field->WithName(name));
}
schema = arrow::schema(fields, schema->metadata());
}
}
void ORCBlockInputFormat::prepareReader()
@ -149,12 +131,17 @@ void ORCBlockInputFormat::prepareReader()
return;
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns);
getPort().getHeader(),
"ORC",
format_settings.orc.import_nested,
format_settings.orc.allow_missing_columns,
format_settings.orc.case_insensitive_column_matching);
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
std::unordered_set<String> nested_table_names;
if (format_settings.orc.import_nested)
nested_table_names = Nested::getAllTableNames(getPort().getHeader());
nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);
/// In ReadStripe column indices should be started from 1,
/// because 0 indicates to select all columns.
@ -165,19 +152,18 @@ void ORCBlockInputFormat::prepareReader()
/// so we should recursively count the number of indices we need for this type.
int indexes_count = countIndicesForType(schema->field(i)->type());
const auto & name = schema->field(i)->name();
if (getPort().getHeader().has(name) || nested_table_names.contains(name))
if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
{
for (int j = 0; j != indexes_count; ++j)
{
include_indices.push_back(index + j);
include_column_names.push_back(name);
}
}
index += indexes_count;
}
}
ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: ISchemaReader(in_), format_settings(format_settings_)
{
}

View File

@ -47,7 +47,6 @@ private:
// indices of columns to read from ORC file
std::vector<int> include_indices;
std::vector<String> include_column_names;
std::vector<size_t> missing_columns;
BlockMissingValues block_missing_values;

View File

@ -53,11 +53,7 @@ Chunk ParquetBlockInputFormat::generate()
std::shared_ptr<arrow::Table> table;
arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table);
if (!read_status.ok())
throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(),
ErrorCodes::CANNOT_READ_ALL_DATA};
if (format_settings.use_lowercase_column_name)
table = *table->RenameColumns(column_names);
throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA};
++row_group_current;
@ -78,7 +74,6 @@ void ParquetBlockInputFormat::resetParser()
file_reader.reset();
column_indices.clear();
column_names.clear();
row_group_current = 0;
block_missing_values.clear();
}
@ -123,20 +118,6 @@ static void getFileReaderAndSchema(
return;
THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader));
THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema));
if (format_settings.use_lowercase_column_name)
{
std::vector<std::shared_ptr<::arrow::Field>> fields;
fields.reserve(schema->num_fields());
for (int i = 0; i < schema->num_fields(); ++i)
{
const auto& field = schema->field(i);
auto name = field->name();
boost::to_lower(name);
fields.push_back(field->WithName(name));
}
schema = arrow::schema(fields, schema->metadata());
}
}
void ParquetBlockInputFormat::prepareReader()
@ -149,12 +130,18 @@ void ParquetBlockInputFormat::prepareReader()
row_group_total = file_reader->num_row_groups();
row_group_current = 0;
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns);
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
getPort().getHeader(),
"Parquet",
format_settings.parquet.import_nested,
format_settings.parquet.allow_missing_columns,
format_settings.parquet.case_insensitive_column_matching);
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
const bool ignore_case = format_settings.parquet.case_insensitive_column_matching;
std::unordered_set<String> nested_table_names;
if (format_settings.parquet.import_nested)
nested_table_names = Nested::getAllTableNames(getPort().getHeader());
nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);
int index = 0;
for (int i = 0; i < schema->num_fields(); ++i)
@ -164,19 +151,19 @@ void ParquetBlockInputFormat::prepareReader()
/// count the number of indices we need for this type.
int indexes_count = countIndicesForType(schema->field(i)->type());
const auto & name = schema->field(i)->name();
if (getPort().getHeader().has(name) || nested_table_names.contains(name))
if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
{
for (int j = 0; j != indexes_count; ++j)
{
column_indices.push_back(index + j);
column_names.push_back(name);
}
}
index += indexes_count;
}
}
ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: ISchemaReader(in_), format_settings(format_settings_)
{
}

View File

@ -40,7 +40,6 @@ private:
int row_group_total = 0;
// indices of columns to read from Parquet file
std::vector<int> column_indices;
std::vector<String> column_names;
std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;
int row_group_current = 0;
std::vector<size_t> missing_columns;

View File

@ -399,6 +399,7 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr, bool is_final) c
return true;
/// disallow GLOBAL IN, GLOBAL NOT IN
/// TODO why?
if ("globalIn" == function_ptr->name
|| "globalNotIn" == function_ptr->name)
return true;

View File

@ -12,6 +12,7 @@
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTInsertQuery.h>
@ -20,6 +21,7 @@
#include <Storages/StorageFactory.h>
#include <Storages/StorageS3.h>
#include <Storages/StorageS3Settings.h>
#include <Storages/StorageSnapshot.h>
#include <Storages/PartitionedSink.h>
#include <IO/ReadBufferFromS3.h>
@ -374,6 +376,16 @@ static bool checkIfObjectExists(const std::shared_ptr<Aws::S3::S3Client> & clien
return false;
}
// TODO: common thread pool for IO must be used instead after PR #35150
static ThreadPool & getThreadPoolStorageS3()
{
constexpr size_t pool_size = 100;
constexpr size_t queue_size = 1000000;
static ThreadPool pool(pool_size, pool_size, queue_size);
return pool;
}
class StorageS3Sink : public SinkToStorage
{
public:
@ -398,7 +410,7 @@ public:
std::make_unique<WriteBufferFromS3>(
client, bucket, key, min_upload_part_size,
upload_part_size_multiply_factor, upload_part_size_multiply_parts_count_threshold,
max_single_part_upload_size), compression_method, 3);
max_single_part_upload_size, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(getThreadPoolStorageS3())), compression_method, 3);
writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings);
}

View File

@ -9,11 +9,10 @@ from github import Github
from env_helper import (
GITHUB_REPOSITORY,
TEMP_PATH,
REPO_COPY,
GITHUB_RUN_URL,
REPORTS_PATH,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
REPO_COPY,
TEMP_PATH,
)
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
@ -126,7 +125,7 @@ if __name__ == "__main__":
logging.info("Exception uploading file %s text %s", f, ex)
paths[f] = ""
report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
report_url = GITHUB_RUN_URL
if paths["runlog.log"]:
report_url = paths["runlog.log"]
if paths["main.log"]:

View File

@ -11,7 +11,7 @@ from env_helper import (
TEMP_PATH,
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
GITHUB_RUN_URL,
)
from report import create_build_html_report
from s3_helper import S3Helper
@ -180,9 +180,7 @@ if __name__ == "__main__":
branch_name = "PR #{}".format(pr_info.number)
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
task_url = (
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
)
task_url = GITHUB_RUN_URL
report = create_build_html_report(
build_check_name,
build_results,

View File

@ -11,7 +11,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union
from github import Github
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
from s3_helper import S3Helper
from pr_info import PRInfo
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
@ -234,6 +234,7 @@ def build_and_push_one_image(
with open(build_log, "wb") as bl:
cmd = (
"docker buildx build --builder default "
f"--label build-url={GITHUB_RUN_URL} "
f"{from_tag_arg}"
f"--build-arg BUILDKIT_INLINE_CACHE=1 "
f"--tag {image.repo}:{version_string} "

View File

@ -4,6 +4,7 @@ import os
import unittest
from unittest.mock import patch
from env_helper import GITHUB_RUN_URL
from pr_info import PRInfo
import docker_images_check as di
@ -117,7 +118,8 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"docker buildx build --builder default --build-arg FROM_TAG=version "
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
"--build-arg FROM_TAG=version "
"--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version --cache-from "
"type=registry,ref=name:version --push --progress plain path",
mock_popen.call_args.args,
@ -133,7 +135,8 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"docker buildx build --builder default --build-arg FROM_TAG=version2 "
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
"--build-arg FROM_TAG=version2 "
"--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from "
"type=registry,ref=name:version2 --progress plain path",
mock_popen.call_args.args,
@ -149,7 +152,7 @@ class TestDockerImageCheck(unittest.TestCase):
mock_popen.assert_called_once()
mock_machine.assert_not_called()
self.assertIn(
"docker buildx build --builder default "
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
"--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from "
"type=registry,ref=name:version2 --progress plain path",
mock_popen.call_args.args,

View File

@ -7,9 +7,10 @@ CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH)
CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN")
GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH")
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID")
GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0")
GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com")
GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))
GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
IMAGES_PATH = os.getenv("IMAGES_PATH")
REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports")
REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../"))

View File

@ -2,7 +2,7 @@
import logging
from github import Github
from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID
from env_helper import GITHUB_RUN_URL
from pr_info import PRInfo
from get_robot_token import get_best_robot_token
from commit_status_helper import get_commit
@ -33,7 +33,7 @@ if __name__ == "__main__":
gh = Github(get_best_robot_token())
commit = get_commit(gh, pr_info.sha)
url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
url = GITHUB_RUN_URL
statuses = filter_statuses(list(commit.get_statuses()))
if NAME in statuses and statuses[NAME].state == "pending":
commit.create_status(

View File

@ -11,6 +11,7 @@ import re
from github import Github
from env_helper import GITHUB_RUN_URL
from pr_info import PRInfo
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
@ -88,9 +89,9 @@ if __name__ == "__main__":
else:
pr_link = f"https://github.com/ClickHouse/ClickHouse/pull/{pr_info.number}"
task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
docker_env += ' -e CHPC_ADD_REPORT_LINKS="<a href={}>Job (actions)</a> <a href={}>Tested commit</a>"'.format(
task_url, pr_link
docker_env += (
f' -e CHPC_ADD_REPORT_LINKS="<a href={GITHUB_RUN_URL}>'
f'Job (actions)</a> <a href={pr_link}>Tested commit</a>"'
)
if "RUN_BY_HASH_TOTAL" in os.environ:
@ -199,7 +200,7 @@ if __name__ == "__main__":
status = "failure"
message = "No message in report."
report_url = task_url
report_url = GITHUB_RUN_URL
if paths["runlog.log"]:
report_url = paths["runlog.log"]

View File

@ -8,7 +8,7 @@ from build_download_helper import get_with_retries
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
GITHUB_RUN_URL,
GITHUB_EVENT_PATH,
)
@ -111,7 +111,7 @@ class PRInfo:
self.sha = github_event["pull_request"]["head"]["sha"]
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}"
self.task_url = GITHUB_RUN_URL
self.repo_full_name = GITHUB_REPOSITORY
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
@ -142,7 +142,7 @@ class PRInfo:
self.sha = github_event["after"]
pull_request = get_pr_for_commit(self.sha, github_event["ref"])
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}"
self.task_url = GITHUB_RUN_URL
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
self.repo_full_name = GITHUB_REPOSITORY
if pull_request is None or pull_request["state"] == "closed":
@ -180,7 +180,7 @@ class PRInfo:
self.number = 0
self.labels = {}
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}"
self.task_url = GITHUB_RUN_URL
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
self.repo_full_name = GITHUB_REPOSITORY
self.pr_html_url = f"{repo_prefix}/commits/{ref}"

View File

@ -5,7 +5,7 @@ import re
from typing import Tuple
from github import Github
from env_helper import GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
from pr_info import PRInfo
from get_robot_token import get_best_robot_token
from commit_status_helper import get_commit
@ -231,7 +231,7 @@ if __name__ == "__main__":
)
sys.exit(1)
url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
url = GITHUB_RUN_URL
if not can_run:
print("::notice ::Cannot run")
commit.create_status(

View File

@ -2,7 +2,7 @@ import os
import logging
import ast
from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID
from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_URL
from report import ReportColorTheme, create_test_html_report
@ -66,7 +66,7 @@ def upload_results(
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_number}"
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}"
task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
task_url = GITHUB_RUN_URL
if additional_urls:
raw_log_url = additional_urls[0]

View File

@ -238,7 +238,7 @@ def _update_dockerfile(repo_path: str, version: ClickHouseVersion):
def update_version_local(repo_path, version, version_type="testing"):
update_contributors()
version.with_description(version_type)
update_cmake_version(version, version_type)
update_cmake_version(version)
_update_changelog(repo_path, version)
_update_dockerfile(repo_path, version)

View File

@ -373,6 +373,11 @@ class SettingsRandomizer:
"priority": lambda: int(abs(random.gauss(0, 2))),
"output_format_parallel_formatting": lambda: random.randint(0, 1),
"input_format_parallel_parsing": lambda: random.randint(0, 1),
"min_chunk_bytes_for_parallel_parsing": lambda: max(1024, int(random.gauss(10 * 1024 * 1024, 5 * 1000 * 1000))),
"max_read_buffer_size": lambda: random.randint(1, 20) if random.random() < 0.1 else random.randint(500000, 1048576),
"prefer_localhost_replica": lambda: random.randint(0, 1),
"max_block_size": lambda: random.randint(8000, 100000),
"max_threads": lambda: random.randint(1, 64),
}
@staticmethod

View File

@ -1,5 +1,7 @@
<clickhouse>
<zookeeper>
<!--<zookeeper_load_balancing>random / in_order / nearest_hostname / first_or_random / round_robin</zookeeper_load_balancing>-->
<zookeeper_load_balancing>random</zookeeper_load_balancing>
<node index="1">
<host>localhost</host>
<port>9181</port>

View File

@ -1459,7 +1459,7 @@
"xor"
"xxHash32"
"xxHash64"
"yandexConsistentHash"
"kostikConsistentHash"
"YEAR"
"yearweek"
"yesterday"

View File

@ -26,7 +26,7 @@
"toUnixTimestamp64Nano"
"toUnixTimestamp64Micro"
"jumpConsistentHash"
"yandexConsistentHash"
"kostikConsistentHash"
"addressToSymbol"
"toJSONString"
"JSON_VALUE"

View File

@ -16,21 +16,28 @@ import traceback
import urllib.parse
import shlex
import urllib3
from cassandra.policies import RoundRobinPolicy
import cassandra.cluster
import psycopg2
import pymongo
import pymysql
import requests
from confluent_kafka.avro.cached_schema_registry_client import (
CachedSchemaRegistryClient,
)
try:
# Please, add modules that required for specific tests only here.
# So contributors will be able to run most tests locally
# without installing tons of unneeded packages that may be not so easy to install.
from cassandra.policies import RoundRobinPolicy
import cassandra.cluster
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import pymongo
import pymysql
from confluent_kafka.avro.cached_schema_registry_client import (
CachedSchemaRegistryClient,
)
except Exception as e:
logging.warning(f"Cannot import some modules, some tests may not work: {e}")
from dict2xml import dict2xml
from kazoo.client import KazooClient
from kazoo.exceptions import KazooException
from minio import Minio
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry
from helpers import pytest_xdist_logging_to_separate_files

View File

@ -67,10 +67,10 @@ def started_cluster():
insert into data (key) select * from numbers(10);
create table if not exists dist_one as data engine=Distributed(one_shard, currentDatabase(), data, key);
create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, yandexConsistentHash(key, 2));
create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, kostikConsistentHash(key, 2));
create table if not exists dist_two as data engine=Distributed(two_shards, currentDatabase(), data, key);
create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, yandexConsistentHash(key, 2));
create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, kostikConsistentHash(key, 2));
"""
)
yield cluster

View File

@ -361,6 +361,8 @@ def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations):
)
node1.query("OPTIMIZE TABLE ttl_delete_test FINAL")
node1.query("SYSTEM SYNC REPLICA ttl_delete_test")
node2.query("SYSTEM SYNC REPLICA ttl_delete_test")
if large_data:

View File

@ -0,0 +1,19 @@
<clickhouse>
<zookeeper>
<!--<zookeeper_load_balancing> random / in_order / nearest_hostname / first_or_random / round_robin </zookeeper_load_balancing>-->
<zookeeper_load_balancing>random</zookeeper_load_balancing>
<node index="1">
<host>zoo1</host>
<port>2181</port>
</node>
<node index="2">
<host>zoo2</host>
<port>2181</port>
</node>
<node index="3">
<host>zoo3</host>
<port>2181</port>
</node>
<session_timeout_ms>3000</session_timeout_ms>
</zookeeper>
</clickhouse>

View File

@ -0,0 +1,427 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
cluster = ClickHouseCluster(
__file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml"
)
# use 3-letter hostnames, so getHostNameDifference("nod1", "zoo1") will work as expected
node1 = cluster.add_instance(
"nod1", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]
)
node2 = cluster.add_instance(
"nod2", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]
)
node3 = cluster.add_instance(
"nod3", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]
)
def change_balancing(old, new, reload=True):
line = "<zookeeper_load_balancing>{}<"
old_line = line.format(old)
new_line = line.format(new)
for node in [node1, node2, node3]:
node.replace_in_config(
"/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml",
old_line,
new_line,
)
if reload:
node.query("select '{}', '{}'".format(old, new))
node.query("system reload config")
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_first_or_random(started_cluster):
try:
change_balancing("random", "first_or_random")
print(
str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
finally:
change_balancing("first_or_random", "random", reload=False)
def test_in_order(started_cluster):
try:
change_balancing("random", "in_order")
print(
str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
finally:
change_balancing("in_order", "random", reload=False)
def test_nearest_hostname(started_cluster):
try:
change_balancing("random", "nearest_hostname")
print(
str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
finally:
change_balancing("nearest_hostname", "random", reload=False)
def test_round_robin(started_cluster):
pm = PartitionManager()
try:
pm._add_rule(
{
"source": node1.ip_address,
"destination": cluster.get_instance_ip("zoo1"),
"action": "REJECT --reject-with tcp-reset",
}
)
pm._add_rule(
{
"source": node2.ip_address,
"destination": cluster.get_instance_ip("zoo1"),
"action": "REJECT --reject-with tcp-reset",
}
)
pm._add_rule(
{
"source": node3.ip_address,
"destination": cluster.get_instance_ip("zoo1"),
"action": "REJECT --reject-with tcp-reset",
}
)
change_balancing("random", "round_robin")
print(
str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node1.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node2.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
print(
str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED",
],
privileged=True,
user="root",
)
)
)
assert (
"1"
== str(
node3.exec_in_container(
[
"bash",
"-c",
"lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l",
],
privileged=True,
user="root",
)
).strip()
)
finally:
pm.heal_all()
change_balancing("round_robin", "random", reload=False)

View File

@ -3,7 +3,7 @@
<substitution>
<name>hash_func</name>
<values>
<value>yandexConsistentHash</value>
<value>kostikConsistentHash</value>
<value>jumpConsistentHash</value>
</values>
</substitution>

View File

@ -99,6 +99,6 @@ abc
1
1
Остальные
Яндекс
Bigmir)net
Google
Остальные

View File

@ -8,10 +8,10 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM sys
SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1.1) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], 1) FROM system.numbers LIMIT 10;
SELECT transform(1, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;
SELECT transform(2, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;
SELECT transform(3, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;
SELECT transform(4, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;
SELECT transform(1, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title;
SELECT transform(2, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title;
SELECT transform(3, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title;
SELECT transform(4, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title;
SELECT transform('hello', 'wrong', 1); -- { serverError 43 }
SELECT transform('hello', ['wrong'], 1); -- { serverError 43 }
SELECT transform('hello', ['wrong'], [1]); -- { serverError 43 }

View File

@ -79,6 +79,6 @@ abc
1
1
Остальные
Яндекс
Meta.ua
Google
Остальные

View File

@ -6,7 +6,7 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize
SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1)) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1.1)) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], materialize(1)) FROM system.numbers LIMIT 10;
SELECT transform(1, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title;
SELECT transform(2, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title;
SELECT transform(3, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title;
SELECT transform(4, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title;
SELECT transform(1, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title;
SELECT transform(2, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title;
SELECT transform(3, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title;
SELECT transform(4, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title;

View File

@ -34,25 +34,25 @@ Hello, World
0,1,2,3,4,5,6,7
0,1,2,3,4,5,6,7,8
yandex
yandex google
yandex google test
yandex google test 123
yandex google test 123
yandex google test 123 hello
yandex google test 123 hello world
yandex google test 123 hello world goodbye
yandex google test 123 hello world goodbye xyz
yandex google test 123 hello world goodbye xyz yandex
yandex google test 123 hello world goodbye xyz yandex google
yandex google test 123 hello world goodbye xyz yandex google test
yandex google test 123 hello world goodbye xyz yandex google test 123
yandex google test 123 hello world goodbye xyz yandex google test 123
yandex google test 123 hello world goodbye xyz yandex google test 123 hello
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz yandex
meta.ua
meta.ua google
meta.ua google test
meta.ua google test 123
meta.ua google test 123
meta.ua google test 123 hello
meta.ua google test 123 hello world
meta.ua google test 123 hello world goodbye
meta.ua google test 123 hello world goodbye xyz
meta.ua google test 123 hello world goodbye xyz meta.ua
meta.ua google test 123 hello world goodbye xyz meta.ua google
meta.ua google test 123 hello world goodbye xyz meta.ua google test
meta.ua google test 123 hello world goodbye xyz meta.ua google test 123
meta.ua google test 123 hello world goodbye xyz meta.ua google test 123
meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello
meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world
meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye
meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz
meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz meta.ua
0
01

View File

@ -6,7 +6,7 @@ SELECT arrayStringConcat(emptyArrayString());
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10;
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), '') FROM system.numbers LIMIT 10;
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), ',') FROM system.numbers LIMIT 10;
SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20;
SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['meta.ua', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20;
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10;
SELECT arrayStringConcat([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null], ';');
SELECT arrayStringConcat([Null::Nullable(String), Null::Nullable(String)], ';');

View File

@ -1,8 +1,8 @@
['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h']
['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g']
b d f d f h b d d h f h b d f d f h b d d h f h
http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f
http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f
['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h']
['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g']
b d f d f h b d d h f h b d f d f h b d d h f h
http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f
http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f

View File

@ -1,200 +1,200 @@
SELECT
extractURLParameters('http://yandex.ru/?a=b&c=d'),
extractURLParameters('http://yandex.ru/?a=b&c=d#e=f'),
extractURLParameters('http://yandex.ru/?a&c=d#e=f'),
extractURLParameters('http://yandex.ru/?a=b&c=d#e=f&g=h'),
extractURLParameters('http://yandex.ru/?a=b&c=d#e'),
extractURLParameters('http://yandex.ru/?a=b&c=d#e&g=h'),
extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'),
extractURLParameters('//yandex.ru/?a=b&c=d'),
extractURLParameters('//yandex.ru/?a=b&c=d#e=f'),
extractURLParameters('//yandex.ru/?a&c=d#e=f'),
extractURLParameters('//yandex.ru/?a=b&c=d#e=f&g=h'),
extractURLParameters('//yandex.ru/?a=b&c=d#e'),
extractURLParameters('//yandex.ru/?a=b&c=d#e&g=h'),
extractURLParameters('//yandex.ru/?a=b&c=d#test?e=f&g=h');
extractURLParameters('http://bigmir.net/?a=b&c=d'),
extractURLParameters('http://bigmir.net/?a=b&c=d#e=f'),
extractURLParameters('http://bigmir.net/?a&c=d#e=f'),
extractURLParameters('http://bigmir.net/?a=b&c=d#e=f&g=h'),
extractURLParameters('http://bigmir.net/?a=b&c=d#e'),
extractURLParameters('http://bigmir.net/?a=b&c=d#e&g=h'),
extractURLParameters('http://bigmir.net/?a=b&c=d#test?e=f&g=h'),
extractURLParameters('//bigmir.net/?a=b&c=d'),
extractURLParameters('//bigmir.net/?a=b&c=d#e=f'),
extractURLParameters('//bigmir.net/?a&c=d#e=f'),
extractURLParameters('//bigmir.net/?a=b&c=d#e=f&g=h'),
extractURLParameters('//bigmir.net/?a=b&c=d#e'),
extractURLParameters('//bigmir.net/?a=b&c=d#e&g=h'),
extractURLParameters('//bigmir.net/?a=b&c=d#test?e=f&g=h');
SELECT
extractURLParameterNames('http://yandex.ru/?a=b&c=d'),
extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f'),
extractURLParameterNames('http://yandex.ru/?a&c=d#e=f'),
extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f&g=h'),
extractURLParameterNames('http://yandex.ru/?a=b&c=d#e'),
extractURLParameterNames('http://yandex.ru/?a=b&c=d#e&g=h'),
extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'),
extractURLParameterNames('//yandex.ru/?a=b&c=d'),
extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f'),
extractURLParameterNames('//yandex.ru/?a&c=d#e=f'),
extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f&g=h'),
extractURLParameterNames('//yandex.ru/?a=b&c=d#e'),
extractURLParameterNames('//yandex.ru/?a=b&c=d#e&g=h'),
extractURLParameterNames('//yandex.ru/?a=b&c=d#test?e=f&g=h');
extractURLParameterNames('http://bigmir.net/?a=b&c=d'),
extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f'),
extractURLParameterNames('http://bigmir.net/?a&c=d#e=f'),
extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f&g=h'),
extractURLParameterNames('http://bigmir.net/?a=b&c=d#e'),
extractURLParameterNames('http://bigmir.net/?a=b&c=d#e&g=h'),
extractURLParameterNames('http://bigmir.net/?a=b&c=d#test?e=f&g=h'),
extractURLParameterNames('//bigmir.net/?a=b&c=d'),
extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f'),
extractURLParameterNames('//bigmir.net/?a&c=d#e=f'),
extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f&g=h'),
extractURLParameterNames('//bigmir.net/?a=b&c=d#e'),
extractURLParameterNames('//bigmir.net/?a=b&c=d#e&g=h'),
extractURLParameterNames('//bigmir.net/?a=b&c=d#test?e=f&g=h');
SELECT
extractURLParameter('http://yandex.ru/?a=b&c=d', 'a'),
extractURLParameter('http://yandex.ru/?a=b&c=d', 'c'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'),
extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'),
extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'),
extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'),
extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'),
extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'),
extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'),
extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'),
extractURLParameter('//yandex.ru/?a=b&c=d', 'a'),
extractURLParameter('//yandex.ru/?a=b&c=d', 'c'),
extractURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'),
extractURLParameter('//yandex.ru/?a&c=d#e=f', 'a'),
extractURLParameter('//yandex.ru/?a&c=d#e=f', 'c'),
extractURLParameter('//yandex.ru/?a&c=d#e=f', 'e'),
extractURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'),
extractURLParameter('//yandex.ru/?a=b&c=d#e', 'a'),
extractURLParameter('//yandex.ru/?a=b&c=d#e', 'c'),
extractURLParameter('//yandex.ru/?a=b&c=d#e', 'e'),
extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'),
extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'),
extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'),
extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'),
extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'),
extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g');
extractURLParameter('http://bigmir.net/?a=b&c=d', 'a'),
extractURLParameter('http://bigmir.net/?a=b&c=d', 'c'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'),
extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'),
extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'),
extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'),
extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'),
extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'),
extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'),
extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'),
extractURLParameter('//bigmir.net/?a=b&c=d', 'a'),
extractURLParameter('//bigmir.net/?a=b&c=d', 'c'),
extractURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'),
extractURLParameter('//bigmir.net/?a&c=d#e=f', 'a'),
extractURLParameter('//bigmir.net/?a&c=d#e=f', 'c'),
extractURLParameter('//bigmir.net/?a&c=d#e=f', 'e'),
extractURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'),
extractURLParameter('//bigmir.net/?a=b&c=d#e', 'a'),
extractURLParameter('//bigmir.net/?a=b&c=d#e', 'c'),
extractURLParameter('//bigmir.net/?a=b&c=d#e', 'e'),
extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'),
extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'),
extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'),
extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'),
extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'),
extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g');
SELECT
cutURLParameter('http://yandex.ru/?a=b&c=d', 'a'),
cutURLParameter('http://yandex.ru/?a=b&c=d', 'c'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'),
cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'),
cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'),
cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'),
cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'),
cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'),
cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'),
cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'),
cutURLParameter('//yandex.ru/?a=b&c=d', 'a'),
cutURLParameter('//yandex.ru/?a=b&c=d', 'c'),
cutURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'),
cutURLParameter('//yandex.ru/?a&c=d#e=f', 'a'),
cutURLParameter('//yandex.ru/?a&c=d#e=f', 'c'),
cutURLParameter('//yandex.ru/?a&c=d#e=f', 'e'),
cutURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'),
cutURLParameter('//yandex.ru/?a=b&c=d#e', 'a'),
cutURLParameter('//yandex.ru/?a=b&c=d#e', 'c'),
cutURLParameter('//yandex.ru/?a=b&c=d#e', 'e'),
cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'),
cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'),
cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'),
cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'),
cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'),
cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g');
cutURLParameter('http://bigmir.net/?a=b&c=d', 'a'),
cutURLParameter('http://bigmir.net/?a=b&c=d', 'c'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'),
cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'),
cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'),
cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'),
cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'),
cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'),
cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'),
cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'),
cutURLParameter('//bigmir.net/?a=b&c=d', 'a'),
cutURLParameter('//bigmir.net/?a=b&c=d', 'c'),
cutURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'),
cutURLParameter('//bigmir.net/?a&c=d#e=f', 'a'),
cutURLParameter('//bigmir.net/?a&c=d#e=f', 'c'),
cutURLParameter('//bigmir.net/?a&c=d#e=f', 'e'),
cutURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'),
cutURLParameter('//bigmir.net/?a=b&c=d#e', 'a'),
cutURLParameter('//bigmir.net/?a=b&c=d#e', 'c'),
cutURLParameter('//bigmir.net/?a=b&c=d#e', 'e'),
cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'),
cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'),
cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'),
cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'),
cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'),
cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g');
SELECT
extractURLParameters(materialize('http://yandex.ru/?a=b&c=d')),
extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f')),
extractURLParameters(materialize('http://yandex.ru/?a&c=d#e=f')),
extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')),
extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e')),
extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e&g=h')),
extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')),
extractURLParameters(materialize('//yandex.ru/?a=b&c=d')),
extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f')),
extractURLParameters(materialize('//yandex.ru/?a&c=d#e=f')),
extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')),
extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e')),
extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e&g=h')),
extractURLParameters(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'));
extractURLParameters(materialize('http://bigmir.net/?a=b&c=d')),
extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f')),
extractURLParameters(materialize('http://bigmir.net/?a&c=d#e=f')),
extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')),
extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e')),
extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e&g=h')),
extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')),
extractURLParameters(materialize('//bigmir.net/?a=b&c=d')),
extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f')),
extractURLParameters(materialize('//bigmir.net/?a&c=d#e=f')),
extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')),
extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e')),
extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e&g=h')),
extractURLParameters(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'));
SELECT
extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d')),
extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f')),
extractURLParameterNames(materialize('http://yandex.ru/?a&c=d#e=f')),
extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')),
extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e')),
extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e&g=h')),
extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')),
extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d')),
extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f')),
extractURLParameterNames(materialize('//yandex.ru/?a&c=d#e=f')),
extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')),
extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e')),
extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e&g=h')),
extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'));
extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d')),
extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f')),
extractURLParameterNames(materialize('http://bigmir.net/?a&c=d#e=f')),
extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')),
extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e')),
extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e&g=h')),
extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')),
extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d')),
extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f')),
extractURLParameterNames(materialize('//bigmir.net/?a&c=d#e=f')),
extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')),
extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e')),
extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e&g=h')),
extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'));
SELECT
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'),
extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'),
extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'),
extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'),
extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'),
extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'),
extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'),
extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'),
extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g');
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'),
extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'),
extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'),
extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'),
extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'),
extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'),
extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'),
extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'),
extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g');
SELECT
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'),
cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'),
cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'),
cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'),
cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'),
cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'),
cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'),
cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'),
cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g');
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'),
cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'),
cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'),
cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'),
cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'),
cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'),
cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'),
cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'),
cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g');

View File

@ -1,3 +1,3 @@
canada congo net-domena
yandex yandex yandex яндекс yandex
meta bigmir yahoo гугл meta
canada hello hello canada

View File

@ -4,12 +4,12 @@ SELECT
firstSignificantSubdomain('http://pochemu.net-domena.ru') AS why;
SELECT
firstSignificantSubdomain('ftp://www.yandex.com.tr/news.html'),
firstSignificantSubdomain('https://www.yandex.ua/news.html'),
firstSignificantSubdomain('magnet:yandex.abc'),
firstSignificantSubdomain('ftp://www.yandex.co.uk/news.html'),
firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'),
firstSignificantSubdomain('//www.yandex.com.tr/news.html');
firstSignificantSubdomain('ftp://www.meta.com.ua/news.html'),
firstSignificantSubdomain('https://www.bigmir.net/news.html'),
firstSignificantSubdomain('magnet:ukr.abc'),
firstSignificantSubdomain('ftp://www.yahoo.co.jp/news.html'),
firstSignificantSubdomain('https://api.www3.static.dev.ввв.гугл.ком'),
firstSignificantSubdomain('//www.meta.com.ua/news.html');
SELECT
firstSignificantSubdomain('http://hello.canada.c'),

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-fasttest
# Tags: no-fasttest, no-random-settings
# set -x

View File

@ -1,6 +1,6 @@
-- Tags: no-fasttest
SELECT jumpConsistentHash(1, 1), jumpConsistentHash(42, 57), jumpConsistentHash(256, 1024), jumpConsistentHash(3735883980, 1), jumpConsistentHash(3735883980, 666), jumpConsistentHash(16045690984833335023, 255);
SELECT yandexConsistentHash(16045690984833335023, 1), yandexConsistentHash(16045690984833335023, 2), yandexConsistentHash(16045690984833335023, 3), yandexConsistentHash(16045690984833335023, 4), yandexConsistentHash(16045690984833335023, 173), yandexConsistentHash(16045690984833335023, 255);
SELECT kostikConsistentHash(16045690984833335023, 1), kostikConsistentHash(16045690984833335023, 2), kostikConsistentHash(16045690984833335023, 3), kostikConsistentHash(16045690984833335023, 4), kostikConsistentHash(16045690984833335023, 173), kostikConsistentHash(16045690984833335023, 255);
SELECT jumpConsistentHash(intHash64(number), 787) FROM system.numbers LIMIT 1000000, 2;
SELECT yandexConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2;
SELECT kostikConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2;

View File

@ -1,23 +1,23 @@
{"total":"1","domain":"baidu.com"}
{"total":"2","domain":"facebook.com"}
{"total":"1","domain":"google.com"}
{"total":"2","domain":"yandex.ru"}
{"total":"2","domain":"meta.ua"}
{"total":"1","domain":"baidu.com"}
{"total":"2","domain":"facebook.com"}
{"total":"1","domain":"google.com"}
{"total":"2","domain":"yandex.ru"}
{"total":"2","domain":"meta.ua"}
1 baidu.com
2 facebook.com
1 google.com
2 yandex.ru
2 meta.ua
1 baidu.com
2 facebook.com
1 google.com
2 yandex.ru
2 meta.ua
1 baidu.com
1 google.com
2 facebook.com
2 yandex.ru
2 meta.ua
1
1
2
@ -25,4 +25,4 @@
baidu.com
google.com
facebook.com
yandex.ru
meta.ua

View File

@ -4,8 +4,8 @@ DROP TABLE IF EXISTS transactions;
CREATE TABLE clicks (domain String) ENGINE = Memory;
CREATE TABLE transactions (domain String) ENGINE = Memory;
INSERT INTO clicks VALUES ('facebook.com'), ('yandex.ru'), ('google.com');
INSERT INTO transactions VALUES ('facebook.com'), ('yandex.ru'), ('baidu.com');
INSERT INTO clicks VALUES ('facebook.com'), ('meta.ua'), ('google.com');
INSERT INTO transactions VALUES ('facebook.com'), ('meta.ua'), ('baidu.com');
SELECT

Some files were not shown because too many files have changed in this diff Show More