fix conflicts

This commit is contained in:
taiyang-li 2024-04-12 15:22:28 +08:00
commit 2e4d31270e
620 changed files with 10277 additions and 2501 deletions

View File

@ -119,7 +119,6 @@ Checks: [
'-readability-named-parameter',
'-readability-redundant-declaration',
'-readability-simplify-boolean-expr',
'-readability-static-accessed-through-instance',
'-readability-suspicious-call-argument',
'-readability-uppercase-literal-suffix',
'-readability-use-anyofallof',

View File

@ -23,6 +23,10 @@ jobs:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: Merge sync PR
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 sync_pr.py --merge || :
- name: Python unit tests
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -158,15 +158,24 @@ jobs:
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs: [Tests_1, Tests_2]
needs: [Tests_1, Tests_2, Builds_1_Report, Builds_2_Report]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check sync status
if: ${{ github.event_name == 'merge_group' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 sync_pr.py --status
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
- name: Auto merge if approved
if: ${{ github.event_name != 'merge_group' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --check-approved

View File

@ -29,11 +29,13 @@ public:
requires std::is_convertible_v<G, F>
constexpr BasicScopeGuard & operator=(BasicScopeGuard<G> && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations)
{
if (this != &src)
if constexpr (std::is_same_v<G, F>)
{
invoke();
function = src.release();
if (this == &src)
return *this;
}
invoke();
function = src.release();
return *this;
}

View File

@ -4835,7 +4835,7 @@ for (;; ptr++)
If the class contains characters outside the 0-255 range, a different
opcode is compiled. It may optionally have a bit map for characters < 256,
but those above are are explicitly listed afterwards. A flag byte tells
but those above are explicitly listed afterwards. A flag byte tells
whether the bitmap is present, and whether this is a negated class or not.
In JavaScript compatibility mode, an isolated ']' causes an error. In

View File

@ -213,6 +213,19 @@ namespace Net
Poco::Timespan getKeepAliveTimeout() const;
/// Returns the connection timeout for HTTP connections.
void setKeepAliveMaxRequests(int max_requests);
int getKeepAliveMaxRequests() const;
int getKeepAliveRequest() const;
bool isKeepAliveExpired(double reliability = 1.0) const;
/// Returns if the connection is expired with some margin as fraction of timeout as reliability
double getKeepAliveReliability() const;
/// Returns the current fraction of keep alive timeout when connection is considered safe to use
/// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException
virtual std::ostream & sendRequest(HTTPRequest & request);
/// Sends the header for the given HTTP request to
/// the server.
@ -345,6 +358,8 @@ namespace Net
void assign(HTTPClientSession & session);
void setKeepAliveRequest(int request);
HTTPSessionFactory _proxySessionFactory;
/// Factory to create HTTPClientSession to proxy.
private:
@ -353,6 +368,8 @@ namespace Net
Poco::UInt16 _port;
ProxyConfig _proxyConfig;
Poco::Timespan _keepAliveTimeout;
int _keepAliveCurrentRequest = 0;
int _keepAliveMaxRequests = 1000;
Poco::Timestamp _lastRequest;
bool _reconnect;
bool _mustReconnect;
@ -361,6 +378,7 @@ namespace Net
Poco::SharedPtr<std::ostream> _pRequestStream;
Poco::SharedPtr<std::istream> _pResponseStream;
static const double _defaultKeepAliveReliabilityLevel;
static ProxyConfig _globalProxyConfig;
HTTPClientSession(const HTTPClientSession &);
@ -450,9 +468,19 @@ namespace Net
return _lastRequest;
}
inline void HTTPClientSession::setLastRequest(Poco::Timestamp time)
inline double HTTPClientSession::getKeepAliveReliability() const
{
_lastRequest = time;
return _defaultKeepAliveReliabilityLevel;
}
inline int HTTPClientSession::getKeepAliveMaxRequests() const
{
return _keepAliveMaxRequests;
}
inline int HTTPClientSession::getKeepAliveRequest() const
{
return _keepAliveCurrentRequest;
}
}

View File

@ -120,6 +120,10 @@ namespace Net
/// The value is set to "Keep-Alive" if keepAlive is
/// true, or to "Close" otherwise.
void setKeepAliveTimeout(int timeout, int max_requests);
int getKeepAliveTimeout() const;
int getKeepAliveMaxRequests() const;
bool getKeepAlive() const;
/// Returns true if
/// * the message has a Connection header field and its value is "Keep-Alive"

View File

@ -44,7 +44,7 @@ namespace Net
/// - timeout: 60 seconds
/// - keepAlive: true
/// - maxKeepAliveRequests: 0
/// - keepAliveTimeout: 10 seconds
/// - keepAliveTimeout: 15 seconds
void setServerName(const std::string & serverName);
/// Sets the name and port (name:port) that the server uses to identify itself.

View File

@ -56,6 +56,8 @@ namespace Net
SocketAddress serverAddress();
/// Returns the server's address.
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
private:
bool _firstRequest;
Poco::Timespan _keepAliveTimeout;

View File

@ -37,6 +37,7 @@ namespace Net {
HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig;
const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9;
HTTPClientSession::HTTPClientSession():
@ -220,7 +221,41 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config)
void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout)
{
_keepAliveTimeout = timeout;
if (connected())
{
throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, "
"That value is managed privately after connection is established.");
}
_keepAliveTimeout = timeout;
}
void HTTPClientSession::setKeepAliveMaxRequests(int max_requests)
{
if (connected())
{
throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, "
"That value is managed privately after connection is established.");
}
_keepAliveMaxRequests = max_requests;
}
void HTTPClientSession::setKeepAliveRequest(int request)
{
_keepAliveCurrentRequest = request;
}
void HTTPClientSession::setLastRequest(Poco::Timestamp time)
{
if (connected())
{
throw Poco::IllegalStateException("cannot change last request on initiated connection, "
"That value is managed privately after connection is established.");
}
_lastRequest = time;
}
@ -231,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
clearException();
_responseReceived = false;
_keepAliveCurrentRequest += 1;
bool keepAlive = getKeepAlive();
if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty())
{
@ -241,8 +278,10 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
{
if (!connected())
reconnect();
if (!keepAlive)
request.setKeepAlive(false);
if (!request.has(HTTPMessage::CONNECTION))
request.setKeepAlive(keepAlive);
if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0)
request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests);
if (!request.has(HTTPRequest::HOST) && !_host.empty())
request.setHost(_host, _port);
if (!_proxyConfig.host.empty() && !bypassProxy())
@ -324,6 +363,17 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response)
_mustReconnect = getKeepAlive() && !response.getKeepAlive();
if (!_mustReconnect)
{
/// when server sends its keep alive timeout, client has to follow that value
auto timeout = response.getKeepAliveTimeout();
if (timeout > 0)
_keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0));
auto max_requests = response.getKeepAliveMaxRequests();
if (max_requests > 0)
_keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests);
}
if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
_pResponseStream = new HTTPFixedLengthInputStream(*this, 0);
else if (response.getChunkedTransferEncoding())
@ -430,15 +480,18 @@ std::string HTTPClientSession::proxyRequestPrefix() const
return result;
}
bool HTTPClientSession::isKeepAliveExpired(double reliability) const
{
Poco::Timestamp now;
return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest
|| _keepAliveCurrentRequest > _keepAliveMaxRequests;
}
bool HTTPClientSession::mustReconnect() const
{
if (!_mustReconnect)
{
Poco::Timestamp now;
return _keepAliveTimeout <= now - _lastRequest;
}
else return true;
return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel);
return true;
}
@ -511,14 +564,21 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session)
if (buffered())
throw Poco::LogicException("assign to a session with not empty buffered data");
attachSocket(session.detachSocket());
setLastRequest(session.getLastRequest());
poco_assert(!connected());
setResolvedHost(session.getResolvedHost());
setKeepAlive(session.getKeepAlive());
setProxyConfig(session.getProxyConfig());
setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout());
setKeepAlive(session.getKeepAlive());
setLastRequest(session.getLastRequest());
setKeepAliveTimeout(session.getKeepAliveTimeout());
setProxyConfig(session.getProxyConfig());
_keepAliveMaxRequests = session._keepAliveMaxRequests;
_keepAliveCurrentRequest = session._keepAliveCurrentRequest;
attachSocket(session.detachSocket());
session.reset();
}

View File

@ -17,6 +17,7 @@
#include "Poco/NumberFormatter.h"
#include "Poco/NumberParser.h"
#include "Poco/String.h"
#include <format>
using Poco::NumberFormatter;
@ -179,4 +180,51 @@ bool HTTPMessage::getKeepAlive() const
}
void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests)
{
add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests));
}
int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name)
{
auto param_value_pos = header_value.find(param_name);
if (param_value_pos == std::string::npos)
param_value_pos = header_value.size();
if (param_value_pos != header_value.size())
param_value_pos += param_name.size();
auto param_value_end = header_value.find(',', param_value_pos);
if (param_value_end == std::string::npos)
param_value_end = header_value.size();
auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos);
if (timeout_value_substr.empty())
return -1;
int value = 0;
auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value);
if (ec == std::errc())
return value;
return -1;
}
int HTTPMessage::getKeepAliveTimeout() const
{
const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
static const std::string_view timeout_param = "timeout=";
return parseFromHeaderValues(ka_header, timeout_param);
}
int HTTPMessage::getKeepAliveMaxRequests() const
{
const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
static const std::string_view timeout_param = "max=";
return parseFromHeaderValues(ka_header, timeout_param);
}
} } // namespace Poco::Net

View File

@ -88,7 +88,18 @@ void HTTPServerConnection::run()
pHandler->handleRequest(request, response);
session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive());
}
/// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor)
if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive())
{
int value = response.getKeepAliveTimeout();
if (value < 0)
value = request.getKeepAliveTimeout();
if (value > 0)
session.setKeepAliveTimeout(Poco::Timespan(value, 0));
}
}
else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED);
}
catch (Poco::Exception&)

View File

@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession()
{
}
void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout)
{
_keepAliveTimeout = keepAliveTimeout;
}
bool HTTPServerSession::hasMoreRequests()
{

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit ba5c67934e8274d649befcffab56731632dc5253
Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb

View File

@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams)
target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy)
# create a symlink to include headers with <avro/...>
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
)
add_dependencies(_avrocpp avro_symlink_headers)
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")

View File

@ -1,26 +1,18 @@
option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES})
option (ENABLE_SSH "Enable support for libssh" ${ENABLE_LIBRARIES})
if (NOT ENABLE_SSH)
message(STATUS "Not using SSH")
message(STATUS "Not using libssh")
return()
endif()
# CMake variables needed by libssh_version.h.cmake, update them when you update libssh
set(libssh_VERSION_MAJOR 0)
set(libssh_VERSION_MINOR 9)
set(libssh_VERSION_PATCH 8)
set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh")
set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh")
# Set CMake variables which are used in libssh_version.h.cmake
project(libssh VERSION 0.9.8 LANGUAGES C)
set(LIBRARY_VERSION "4.8.8")
set(LIBRARY_SOVERSION "4")
set(CMAKE_THREAD_PREFER_PTHREADS ON)
set(THREADS_PREFER_PTHREAD_FLAG ON)
set(WITH_ZLIB OFF)
set(WITH_SYMBOL_VERSIONING OFF)
set(WITH_SERVER ON)
set(libssh_SRCS
${LIB_SOURCE_DIR}/src/agent.c
${LIB_SOURCE_DIR}/src/auth.c
@ -28,15 +20,21 @@ set(libssh_SRCS
${LIB_SOURCE_DIR}/src/bignum.c
${LIB_SOURCE_DIR}/src/buffer.c
${LIB_SOURCE_DIR}/src/callbacks.c
${LIB_SOURCE_DIR}/src/chachapoly.c
${LIB_SOURCE_DIR}/src/channels.c
${LIB_SOURCE_DIR}/src/client.c
${LIB_SOURCE_DIR}/src/config.c
${LIB_SOURCE_DIR}/src/config_parser.c
${LIB_SOURCE_DIR}/src/connect.c
${LIB_SOURCE_DIR}/src/connector.c
${LIB_SOURCE_DIR}/src/curve25519.c
${LIB_SOURCE_DIR}/src/dh.c
${LIB_SOURCE_DIR}/src/ecdh.c
${LIB_SOURCE_DIR}/src/error.c
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
${LIB_SOURCE_DIR}/src/external/blowfish.c
${LIB_SOURCE_DIR}/src/external/chacha.c
${LIB_SOURCE_DIR}/src/external/poly1305.c
${LIB_SOURCE_DIR}/src/getpass.c
${LIB_SOURCE_DIR}/src/init.c
${LIB_SOURCE_DIR}/src/kdf.c
@ -55,37 +53,32 @@ set(libssh_SRCS
${LIB_SOURCE_DIR}/src/pcap.c
${LIB_SOURCE_DIR}/src/pki.c
${LIB_SOURCE_DIR}/src/pki_container_openssh.c
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
${LIB_SOURCE_DIR}/src/poll.c
${LIB_SOURCE_DIR}/src/session.c
${LIB_SOURCE_DIR}/src/scp.c
${LIB_SOURCE_DIR}/src/session.c
${LIB_SOURCE_DIR}/src/socket.c
${LIB_SOURCE_DIR}/src/string.c
${LIB_SOURCE_DIR}/src/threads.c
${LIB_SOURCE_DIR}/src/wrapper.c
${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c
${LIB_SOURCE_DIR}/src/external/blowfish.c
${LIB_SOURCE_DIR}/src/external/chacha.c
${LIB_SOURCE_DIR}/src/external/poly1305.c
${LIB_SOURCE_DIR}/src/chachapoly.c
${LIB_SOURCE_DIR}/src/config_parser.c
${LIB_SOURCE_DIR}/src/token.c
${LIB_SOURCE_DIR}/src/pki_ed25519_common.c
${LIB_SOURCE_DIR}/src/wrapper.c
# some files of libssh/src/ are missing - why?
${LIB_SOURCE_DIR}/src/threads/noop.c
${LIB_SOURCE_DIR}/src/threads/pthread.c
# files missing - why?
# LIBCRYPT specific
${libssh_SRCS}
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
${LIB_SOURCE_DIR}/src/pki_crypto.c
${LIB_SOURCE_DIR}/src/dh_crypto.c
${LIB_SOURCE_DIR}/src/ecdh_crypto.c
${LIB_SOURCE_DIR}/src/libcrypto.c
${LIB_SOURCE_DIR}/src/dh_crypto.c
${LIB_SOURCE_DIR}/src/pki_crypto.c
${LIB_SOURCE_DIR}/src/threads/libcrypto.c
${LIB_SOURCE_DIR}/src/options.c
${LIB_SOURCE_DIR}/src/server.c
${LIB_SOURCE_DIR}/src/bind.c
${LIB_SOURCE_DIR}/src/bind_config.c
${LIB_SOURCE_DIR}/src/options.c
${LIB_SOURCE_DIR}/src/server.c
)
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
@ -94,7 +87,7 @@ endif()
configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake ${LIB_BINARY_DIR}/include/libssh/libssh_version.h @ONLY)
add_library(_ssh STATIC ${libssh_SRCS})
add_library(_ssh ${libssh_SRCS})
add_library(ch_contrib::ssh ALIAS _ssh)
target_link_libraries(_ssh PRIVATE OpenSSL::Crypto)

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.3.1.2672"
ARG VERSION="24.3.2.23"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.3.1.2672"
ARG VERSION="24.3.2.23"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.3.1.2672"
ARG VERSION="24.3.2.23"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -26,6 +26,11 @@
<table_function_remote_max_addresses>
<max>200</max>
</table_function_remote_max_addresses>
<!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
<allow_experimental_analyzer>
<readonly/>
</allow_experimental_analyzer>
</constraints>
</default>
</profiles>

View File

@ -43,6 +43,8 @@ source /utils.lib
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
echo "Azure is disabled"
elif [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
echo "Azure is disabled"
else
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
fi
@ -139,6 +141,32 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
fi
if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
| sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" \
> /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
| sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" \
> /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
mkdir -p /var/run/clickhouse-server1
sudo chown clickhouse:clickhouse /var/run/clickhouse-server1
sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \
--pid-file /var/run/clickhouse-server1/clickhouse-server.pid \
-- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \
--logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \
--tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \
--mysql_port 19004 --postgresql_port 19005 \
--keeper_server.tcp_port 19181 --keeper_server.server_id 2 \
--prometheus.port 19988 \
--macros.replica r2 # It doesn't work :(
MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours)
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
fi
# Wait for the server to start, but not for too long.
for _ in {1..100}
@ -185,6 +213,10 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--s3-storage')
fi
if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--shared-catalog')
fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--replicated-database')
# Too many tests fail for DatabaseReplicated in parallel.
@ -266,6 +298,12 @@ do
echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
fi
if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 )
echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
fi
done
# Stop server so we can safely read data with clickhouse-local.
@ -277,6 +315,10 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||:
fi
if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||:
fi
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
@ -304,6 +346,10 @@ if [ $failed_to_save_logs -ne 0 ]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
fi
if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
fi
done
fi
@ -343,3 +389,10 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||:
fi
if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server1.log ||:
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
fi

View File

@ -0,0 +1,29 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.3.2.23-lts (8b7d910960c) FIXME as compared to v24.3.1.2672-lts (2c5c589a882)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)).
* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value" [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#62030](https://github.com/ClickHouse/ClickHouse/issues/62030):. [#61869](https://github.com/ClickHouse/ClickHouse/pull/61869) ([Nikita Fomichev](https://github.com/fm4v)).
* Backported in [#62057](https://github.com/ClickHouse/ClickHouse/issues/62057): ... [#62044](https://github.com/ClickHouse/ClickHouse/pull/62044) ([Max K.](https://github.com/maxknv)).
* Backported in [#62204](https://github.com/ClickHouse/ClickHouse/issues/62204):. [#62190](https://github.com/ClickHouse/ClickHouse/pull/62190) ([Konstantin Bogdanov](https://github.com/thevar1able)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix some crashes with analyzer and group_by_use_nulls. [#61933](https://github.com/ClickHouse/ClickHouse/pull/61933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix scalars create as select [#61998](https://github.com/ClickHouse/ClickHouse/pull/61998) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Ignore IfChainToMultiIfPass if returned type changed. [#62059](https://github.com/ClickHouse/ClickHouse/pull/62059) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix type for ConvertInToEqualPass [#62066](https://github.com/ClickHouse/ClickHouse/pull/62066) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Revert output Pretty in tty [#62090](https://github.com/ClickHouse/ClickHouse/pull/62090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -68,6 +68,12 @@ In the results of `SELECT` query, the values of `AggregateFunction` type have im
## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view}
The following examples assumes that you have a database named `test` so make sure you create that if it doesn't already exist:
```sql
CREATE DATABASE test;
```
We will create the table `test.visits` that contain the raw data:
``` sql
@ -80,17 +86,24 @@ CREATE TABLE test.visits
) ENGINE = MergeTree ORDER BY (StartDate, CounterID);
```
Next, we need to create an `AggregatingMergeTree` table that will store `AggregationFunction`s that keep track of the total number of visits and the number of unique users.
`AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type:
``` sql
CREATE MATERIALIZED VIEW test.mv_visits
(
CREATE TABLE test.agg_visits (
StartDate DateTime64 NOT NULL,
CounterID UInt64,
Visits AggregateFunction(sum, Nullable(Int32)),
Users AggregateFunction(uniq, Nullable(Int32))
)
ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID)
ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID);
```
And then let's create a materialized view that populates `test.agg_visits` from `test.visits` :
```sql
CREATE MATERIALIZED VIEW test.visits_mv TO test.agg_visits
AS SELECT
StartDate,
CounterID,
@ -104,25 +117,45 @@ Inserting data into the `test.visits` table.
``` sql
INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
VALUES (1667446031, 1, 3, 4)
INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
VALUES (1667446031, 1, 6, 3)
VALUES (1667446031000, 1, 3, 4), (1667446031000, 1, 6, 3);
```
The data is inserted in both the table and the materialized view `test.mv_visits`.
The data is inserted in both `test.visits` and `test.agg_visits`.
To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`:
``` sql
```sql
SELECT
StartDate,
sumMerge(Visits) AS Visits,
uniqMerge(Users) AS Users
FROM test.mv_visits
FROM test.agg_visits
GROUP BY StartDate
ORDER BY StartDate;
```
```text
┌───────────────StartDate─┬─Visits─┬─Users─┐
│ 2022-11-03 03:27:11.000 │ 9 │ 2 │
└─────────────────────────┴────────┴───────┘
```
And how about if we add another couple of records to `test.visits`, but this time we'll use a different timestamp for one of the records:
```sql
INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
VALUES (1669446031000, 2, 5, 10), (1667446031000, 3, 7, 5);
```
If we then run the `SELECT` query again, we'll see the following output:
```text
┌───────────────StartDate─┬─Visits─┬─Users─┐
│ 2022-11-03 03:27:11.000 │ 16 │ 3 │
│ 2022-11-26 07:00:31.000 │ 5 │ 1 │
└─────────────────────────┴────────┴───────┘
```
## Related Content
- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)

View File

@ -45,6 +45,11 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec
CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000;
```
**Modify settings**
```sql
ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;
```
**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to.
## Examples {#examples}
@ -97,3 +102,4 @@ SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and data
│ 65536 │ 10000 │
└─────────────┴────────────┘
```

View File

@ -18,6 +18,9 @@ Run the command:
```bash
wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz
# Option: Validate the checksum
md5sum 2021_08_01_07_01_17_data.tgz
# Checksum should be equal to: db6126724de939a5481e3160a2d67d15
```
Replace the link to the up to date link from http://menus.nypl.org/data if needed.

View File

@ -7,7 +7,7 @@ title: "Crowdsourced air traffic data from The OpenSky Network 2020"
The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic.
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
Source: https://zenodo.org/records/5092942
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
"Crowdsourced air traffic data from the OpenSky Network 20192020"
@ -19,7 +19,7 @@ https://doi.org/10.5194/essd-13-357-2021
Run the command:
```bash
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
wget -O- https://zenodo.org/records/5092942 | grep -oE 'https://zenodo.org/records/5092942/files/flightlist_[0-9]+_[0-9]+\.csv\.gz' | xargs wget
```
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
@ -127,15 +127,15 @@ Average flight distance is around 1000 km.
Query:
```sql
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
SELECT round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2) FROM opensky;
```
Result:
```text
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
│ 1041090.6465708319
└────────────────────────────────────────────────────────────────────┘
┌─round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2)─┐
1. 1041090.67 │ -- 1.04 million
──────────────────────────────────────────────────────────────────────────────┘
```
### Most busy origin airports and the average distance seen {#busy-airports-average-distance}

View File

@ -507,16 +507,18 @@ Example:
``` xml
<http_handlers>
<rule>
<url><![CDATA[/query_param_with_url/\w+/(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></url>
<url><![CDATA[regex:/query_param_with_url/(?P<name_1>[^/]+)]]></url>
<methods>GET</methods>
<headers>
<XXX>TEST_HEADER_VALUE</XXX>
<PARAMS_XXX><![CDATA[(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></PARAMS_XXX>
<PARAMS_XXX><![CDATA[regex:(?P<name_2>[^/]+)]]></PARAMS_XXX>
</headers>
<handler>
<type>predefined_query_handler</type>
<query>SELECT value FROM system.settings WHERE name = {name_1:String}</query>
<query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
<query>
SELECT name, value FROM system.settings
WHERE name IN ({name_1:String}, {name_2:String})
</query>
</handler>
</rule>
<defaults/>
@ -524,13 +526,13 @@ Example:
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_final_threads 2
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2'
max_final_threads 2
max_threads 1
```
:::note
In one `predefined_query_handler` only supports one `query` of an insert type.
In one `predefined_query_handler` only one `query` is supported.
:::
### dynamic_query_handler {#dynamic_query_handler}

View File

@ -67,8 +67,7 @@ SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and
`enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
may return cached results then.
use_query_cache = true`) but one should keep in mind that all `SELECT` queries may return cached results then.
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
[system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events
@ -175,6 +174,10 @@ Also, results of queries with non-deterministic functions are not cached by defa
To force caching of results of queries with non-deterministic functions regardless, use setting
[query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling).
Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force
caching of results of queries with system tables regardless, use setting
[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling).
:::note
Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether
results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect.

View File

@ -42,6 +42,19 @@ Type: UInt32
Default: 1
## auth_use_forwarded_address
Use originating address for authentication for clients connected through proxy.
:::note
This setting should be used with extra caution since forwarded address can be easily spoofed - server accepting such authentication should not be accessed directly but rather exclusively through a trusted proxy.
:::
Type: Bool
Default: 0
## background_buffer_flush_schedule_pool_size
The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in the background.

View File

@ -0,0 +1,155 @@
---
slug: /en/operations/settings/composable-protocols
sidebar_position: 64
sidebar_label: Composable Protocols
---
# Composable Protocols
Composable protocols allows more flexible configuration of TCP access to the ClickHouse server. This configuration can co-exist with or replace conventional configuration.
## Composable protocols section is denoted as `protocols` in configuration xml
**Example:**
``` xml
<protocols>
</protocols>
```
## Basic modules define protocol layers
**Example:**
``` xml
<protocols>
<!-- plain_http module -->
<plain_http>
<type>http</type>
</plain_http>
</protocols>
```
where:
- `plain_http` - name which can be referred by another layer
- `type` - denotes protocol handler which will be instantiated to process data, set of protocol handlers is predefined:
* `tcp` - native clickhouse protocol handler
* `http` - http clickhouse protocol handler
* `tls` - TLS encryption layer
* `proxy1` - PROXYv1 layer
* `mysql` - MySQL compatibility protocol handler
* `postgres` - PostgreSQL compatibility protocol handler
* `prometheus` - Prometheus protocol handler
* `interserver` - clickhouse interserver handler
:::note
`gRPC` protocol handler is not implemented for `Composable protocols`
:::
## Endpoint (i.e. listening port) is denoted by `<port>` and (optional) `<host>` tags
**Example:**
``` xml
<protocols>
<plain_http>
<type>http</type>
<!-- endpoint -->
<host>127.0.0.1</host>
<port>8123</port>
</plain_http>
</protocols>
```
If `<host>` is omitted, then `<listen_host>` from root config is used.
## Layers sequence is defined by `<impl>` tag, referencing another module
**Example:** definition for HTTPS protocol
``` xml
<protocols>
<!-- http module -->
<plain_http>
<type>http</type>
</plain_http>
<!-- https module configured as a tls layer on top of plain_http module -->
<https>
<type>tls</type>
<impl>plain_http</impl>
<host>127.0.0.1</host>
<port>8443</port>
</https>
</protocols>
```
## Endpoint can be attached to any layer
**Example:** definition for HTTP (port 8123) and HTTPS (port 8443) endpoints
``` xml
<protocols>
<plain_http>
<type>http</type>
<host>127.0.0.1</host>
<port>8123</port>
</plain_http>
<https>
<type>tls</type>
<impl>plain_http</impl>
<host>127.0.0.1</host>
<port>8443</port>
</https>
</protocols>
```
## Additional endpoints can be defined by referencing any module and omitting `<type>` tag
**Example:** `another_http` endpoint is defined for `plain_http` module
``` xml
<protocols>
<plain_http>
<type>http</type>
<host>127.0.0.1</host>
<port>8123</port>
</plain_http>
<https>
<type>tls</type>
<impl>plain_http</impl>
<host>127.0.0.1</host>
<port>8443</port>
</https>
<another_http>
<impl>plain_http</impl>
<host>127.0.0.1</host>
<port>8223</port>
</another_http>
</protocols>
```
## Some modules can contain specific for its layer parameters
**Example:** for TLS layer private key (`privateKeyFile`) and certificate files (`certificateFile`) can be specified
``` xml
<protocols>
<plain_http>
<type>http</type>
<host>127.0.0.1</host>
<port>8123</port>
</plain_http>
<https>
<type>tls</type>
<impl>plain_http</impl>
<host>127.0.0.1</host>
<port>8443</port>
<privateKeyFile>another_server.key</privateKeyFile>
<certificateFile>another_server.crt</certificateFile>
</https>
</protocols>
```

View File

@ -287,7 +287,7 @@ Default value: 0 (seconds)
## remote_fs_execute_merges_on_single_replica_time_threshold
When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
When this setting has a value greater than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled.
:::note Zero-copy replication is not ready for production
Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use.

View File

@ -1689,6 +1689,18 @@ Possible values:
Default value: `throw`.
## query_cache_system_table_handling {#query-cache-system-table-handling}
Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`.
Possible values:
- `'throw'` - Throw an exception and don't cache the query result.
- `'save'` - Cache the query result.
- `'ignore'` - Don't cache the query result and don't throw an exception.
Default value: `throw`.
## query_cache_min_query_runs {#query-cache-min-query-runs}
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
@ -5302,7 +5314,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8);
## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key}
When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
Default value: `false`.

View File

@ -9,6 +9,7 @@ Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time of the entry.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision.
- `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation.
- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the backup storage (the contents of the `FROM` or `TO` clause).
@ -67,6 +68,7 @@ Row 2:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-08-19
event_time: 2023-08-19 11:08:56
event_time_microseconds: 2023-08-19 11:08:56.916192
id: e5b74ecb-f6f1-426a-80be-872f90043885
name: Disk('backups_disk', '1.zip')

View File

@ -7,6 +7,7 @@ Contains logging entries with information about various blob storage operations
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the event.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the event with microseconds precision.
@ -38,6 +39,7 @@ SELECT * FROM system.blob_storage_log WHERE query_id = '7afe0450-504d-4e4b-9a80-
```text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-10-31
event_time: 2023-10-31 16:03:40
event_time_microseconds: 2023-10-31 16:03:40.481437

View File

@ -7,26 +7,33 @@ sidebar_position: 351
[Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
:::note
For a bias corrected version of Cramer's V see: [cramersVBiasCorrected](./cramersvbiascorrected.md)
:::
**Syntax**
``` sql
cramersV(column1, column2)
```
**Arguments**
**Parameters**
- `column1` and `column2` are the columns to be compared
- `column1`: first column to be compared.
- `column2`: second column to be compared.
**Returned value**
- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association).
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
Type: always [Float64](../../../sql-reference/data-types/float.md).
**Example**
The following two columns being compared below have no association with each other, so the result of `cramersV` is 0:
Query:
``` sql
SELECT
cramersV(a, b)

View File

@ -5,31 +5,31 @@ sidebar_position: 352
# cramersVBiasCorrected
Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
**Syntax**
``` sql
cramersVBiasCorrected(column1, column2)
```
**Arguments**
**Parameters**
- `column1` and `column2` are the columns to be compared
- `column1`: first column to be compared.
- `column2`: second column to be compared.
**Returned value**
- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association).
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
Type: always [Float64](../../../sql-reference/data-types/float.md).
**Example**
The following two columns being compared below have a small association with each other. Notice the result of `cramersVBiasCorrected` is smaller than the result of `cramersV`:
Query:
``` sql
SELECT
cramersV(a, b),

View File

@ -15,9 +15,9 @@ The `uniqCombined` function is a good choice for calculating the number of diffe
**Arguments**
The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
**Returned value**
@ -25,26 +25,43 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
**Implementation details**
Function:
The `uniqCombined` function:
- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations.
- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table.
For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- For a small number of distinct elements, an array is used.
- When the set size is larger, a hash table is used.
- For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- Provides the result deterministically (it does not depend on the query processing order).
:::note
Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
Since it uses a 32-bit hash for non-`String` types, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64).
:::
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`:
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined` function:
- Consumes several times less memory.
- Calculates with several times higher accuracy.
- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network.
**Example**
Query:
```sql
SELECT uniqCombined(number) FROM numbers(1e6);
```
Result:
```response
┌─uniqCombined(number)─┐
│ 1001148 │ -- 1.00 million
└──────────────────────┘
```
See the example section of [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) for an example of the difference between `uniqCombined` and `uniqCombined64` for much larger inputs.
**See Also**
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)

View File

@ -5,4 +5,78 @@ sidebar_position: 193
# uniqCombined64
Same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses 64-bit hash for all data types.
Calculates the approximate number of different argument values. It is the same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses a 64-bit hash for all data types rather than just for the String data type.
``` sql
uniqCombined64(HLL_precision)(x[, ...])
```
**Parameters**
- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optionally, you can use the function as `uniqCombined64(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each).
- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
**Returned value**
- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number.
**Implementation details**
The `uniqCombined64` function:
- Calculates a hash (64-bit hash for all data types) for all parameters in the aggregate, then uses it in calculations.
- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table.
- For a small number of distinct elements, an array is used.
- When the set size is larger, a hash table is used.
- For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory.
- Provides the result deterministically (it does not depend on the query processing order).
:::note
Since it uses 64-bit hash for all types, the result does not suffer from very high error for cardinalities significantly larger than `UINT_MAX` like [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) does, which uses a 32-bit hash for non-`String` types.
:::
Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined64` function:
- Consumes several times less memory.
- Calculates with several times higher accuracy.
**Example**
In the example below `uniqCombined64` is run on `1e10` different numbers returning a very close approximation of the number of different argument values.
Query:
```sql
SELECT uniqCombined64(number) FROM numbers(1e10);
```
Result:
```response
┌─uniqCombined64(number)─┐
│ 9998568925 │ -- 10.00 billion
└────────────────────────┘
```
By comparison the `uniqCombined` function returns a rather poor approximation for an input this size.
Query:
```sql
SELECT uniqCombined(number) FROM numbers(1e10);
```
Result:
```response
┌─uniqCombined(number)─┐
│ 5545308725 │ -- 5.55 billion
└──────────────────────┘
```
**See Also**
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/aggregatefunction
sidebar_position: 53
sidebar_position: 46
sidebar_label: AggregateFunction
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/array
sidebar_position: 52
sidebar_position: 32
sidebar_label: Array(T)
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/boolean
sidebar_position: 43
sidebar_position: 22
sidebar_label: Boolean
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/date
sidebar_position: 47
sidebar_position: 12
sidebar_label: Date
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/date32
sidebar_position: 48
sidebar_position: 14
sidebar_label: Date32
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/datetime
sidebar_position: 48
sidebar_position: 16
sidebar_label: DateTime
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/datetime64
sidebar_position: 49
sidebar_position: 18
sidebar_label: DateTime64
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/decimal
sidebar_position: 42
sidebar_position: 6
sidebar_label: Decimal
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/enum
sidebar_position: 50
sidebar_position: 20
sidebar_label: Enum
---

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/fixedstring
sidebar_position: 45
sidebar_position: 10
sidebar_label: FixedString(N)
---
# FixedString
# FixedString(N)
A fixed-length string of `N` bytes (neither characters nor code points).

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/float
sidebar_position: 41
sidebar_position: 4
sidebar_label: Float32, Float64
---

View File

@ -1,8 +1,8 @@
---
slug: /en/sql-reference/data-types/geo
sidebar_position: 62
sidebar_position: 54
sidebar_label: Geo
title: "Geo Data Types"
title: "Geometric"
---
ClickHouse supports data types for representing geographical objects — locations, lands, etc.

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/
sidebar_label: List of data types
sidebar_position: 37
sidebar_position: 1
---
# ClickHouse Data Types
# Data Types in ClickHouse
ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/int-uint
sidebar_position: 40
sidebar_position: 2
sidebar_label: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/ipv4
sidebar_position: 59
sidebar_position: 28
sidebar_label: IPv4
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/ipv6
sidebar_position: 60
sidebar_position: 30
sidebar_label: IPv6
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/json
sidebar_position: 54
sidebar_position: 26
sidebar_label: JSON
---

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/lowcardinality
sidebar_position: 51
sidebar_label: LowCardinality
sidebar_position: 42
sidebar_label: LowCardinality(T)
---
# LowCardinality
# LowCardinality(T)
Changes the internal representation of other data types to be dictionary-encoded.

View File

@ -1,12 +1,12 @@
---
slug: /en/sql-reference/data-types/map
sidebar_position: 65
sidebar_label: Map(key, value)
sidebar_position: 36
sidebar_label: Map(K, V)
---
# Map(key, value)
# Map(K, V)
`Map(key, value)` data type stores `key:value` pairs.
`Map(K, V)` data type stores `key:value` pairs.
**Parameters**

View File

@ -1,27 +0,0 @@
---
slug: /en/sql-reference/data-types/multiword-types
sidebar_position: 61
sidebar_label: Multiword Type Names
title: "Multiword Types"
---
When creating tables, you can use data types with a name consisting of several words. This is implemented for better SQL compatibility.
## Multiword Types Support
| Multiword types | Simple types |
|----------------------------------|--------------------------------------------------------------|
| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) |
| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) |
| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| BINARY VARYING | [String](../../sql-reference/data-types/string.md) |

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/data-types/nullable
sidebar_position: 55
sidebar_label: Nullable
sidebar_position: 44
sidebar_label: Nullable(T)
---
# Nullable(T)

View File

@ -1,5 +1,7 @@
---
slug: /en/sql-reference/data-types/simpleaggregatefunction
sidebar_position: 48
sidebar_label: SimpleAggregateFunction
---
# SimpleAggregateFunction

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/string
sidebar_position: 44
sidebar_position: 8
sidebar_label: String
---
@ -13,7 +13,7 @@ When creating tables, numeric parameters for string fields can be set (e.g. `VAR
Aliases:
- `String``LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`.
- `String``LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`, `CHAR LARGE OBJECT`, `CHAR VARYING`, `CHARACTER LARGE OBJECT`, `CHARACTER VARYING`, `NCHAR LARGE OBJECT`, `NCHAR VARYING`, `NATIONAL CHARACTER LARGE OBJECT`, `NATIONAL CHARACTER VARYING`, `NATIONAL CHAR VARYING`, `NATIONAL CHARACTER`, `NATIONAL CHAR`, `BINARY LARGE OBJECT`, `BINARY VARYING`,
## Encodings

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/tuple
sidebar_position: 54
sidebar_position: 34
sidebar_label: Tuple(T1, T2, ...)
---
# Tuple(T1, T2, )
# Tuple(T1, T2, ...)
A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/uuid
sidebar_position: 46
sidebar_position: 24
sidebar_label: UUID
---

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/variant
sidebar_position: 55
sidebar_label: Variant
sidebar_position: 40
sidebar_label: Variant(T1, T2, ...)
---
# Variant(T1, T2, T3, ...)
# Variant(T1, T2, ...)
This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type
has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value).

View File

@ -774,6 +774,59 @@ Returns the number of elements for which `func(arr1[i], …, arrN[i])` returns s
Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
## arrayDotProduct
Returns the dot product of two arrays.
**Syntax**
```sql
arrayDotProduct(vector1, vector2)
```
Alias: `scalarProduct`, `dotProduct`
**Parameters**
- `vector1`: First vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values.
- `vector2`: Second vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values.
:::note
The sizes of the two vectors must be equal. Arrays and Tuples may also contain mixed element types.
:::
**Returned value**
- The dot product of the two vectors.
Type: numeric - determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype.
**Examples**
Query:
```sql
SELECT arrayDotProduct([1, 2, 3], [4, 5, 6]) AS res, toTypeName(res);
```
Result:
```response
32 UInt16
```
Query:
```sql
SELECT dotProduct((1::UInt16, 2::UInt8, 3::Float32),(4::Int16, 5::Float32, 6::UInt8)) AS res, toTypeName(res);
```
Result:
```response
32 Float64
```
## countEqual(arr, x)
Returns the number of elements in the array equal to x. Equivalent to arrayCount (elem -\> elem = x, arr).
@ -888,6 +941,66 @@ SELECT arrayEnumerateUniq([1, 1, 1, 2, 2, 2], [1, 1, 2, 1, 1, 2]) AS res
This is necessary when using ARRAY JOIN with a nested data structure and further aggregation across multiple elements in this structure.
## arrayEnumerateUniqRanked
Returns an array the same size as the source array, indicating for each element what its position is among elements with the same value. It allows for enumeration of a multidimensional array with the ability to specify how deep to look inside the array.
**Syntax**
```sql
arrayEnumerateUniqRanked(clear_depth, arr, max_array_depth)
```
**Parameters**
- `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`.
- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md).
- `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`.
**Example**
With `clear_depth=1` and `max_array_depth=1`, the result of `arrayEnumerateUniqRanked` is identical to that which [`arrayEnumerateUniq`](#arrayenumerateuniqarr) would give for the same array.
Query:
``` sql
SELECT arrayEnumerateUniqRanked(1, [1,2,1], 1);
```
Result:
``` text
[1,1,2]
```
In this example, `arrayEnumerateUniqRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[1,2,3]`, the corresponding result is `[1,1,1]`, indicating that this is the first time `1`,`2` and `3` are encountered. For the second row of the provided array,`[2,2,1]`, the corresponding result is `[2,3,3]`, indicating that `2` is encountered for a second and third time, and `1` is encountered for the second time. Likewise, for the third row of the provided array `[3]` the corresponding result is `[2]` indicating that `3` is encountered for the second time.
Query:
``` sql
SELECT arrayEnumerateUniqRanked(1, [[1,2,3],[2,2,1],[3]], 2);
```
Result:
``` text
[[1,1,1],[2,3,2],[2]]
```
Changing `clear_depth=2`, results in elements being enumerated separately for each row.
Query:
``` sql
SELECT arrayEnumerateUniqRanked(2, [[1,2,3],[2,2,1],[3]], 2);
```
Result:
``` text
[[1,1,1],[1,2,1],[1]]
```
## arrayPopBack
Removes the last item from the array.
@ -1303,6 +1416,125 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res;
Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
## arrayShuffle
Returns an array of the same size as the original array containing the elements in shuffled order.
Elements are reordered in such a way that each possible permutation of those elements has equal probability of appearance.
**Syntax**
```sql
arrayShuffle(arr[, seed])
```
**Parameters**
- `arr`: The array to partially shuffle. [Array](../data-types/array.md).
- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md).
**Returned value**
- Array with elements shuffled.
**Implementation details**
:::note
This function will not materialize constants.
:::
**Examples**
In this example, `arrayShuffle` is used without providing a `seed` and will therefore generate one randomly itself.
Query:
```sql
SELECT arrayShuffle([1, 2, 3, 4]);
```
Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function.
Result:
```response
[1,4,2,3]
```
In this example, `arrayShuffle` is provided a `seed` and will produce stable results.
Query:
```sql
SELECT arrayShuffle([1, 2, 3, 4], 41);
```
Result:
```response
[3,2,1,4]
```
## arrayPartialShuffle
Given an input array of cardinality `N`, returns an array of size N where elements in the range `[1...limit]` are shuffled and the remaining elements in the range `(limit...n]` are unshuffled.
**Syntax**
```sql
arrayPartialShuffle(arr[, limit[, seed]])
```
**Parameters**
- `arr`: The array size `N` to partially shuffle. [Array](../data-types/array.md).
- `limit` (optional): The number to limit element swaps to, in the range `[1..N]`. [UInt or Int](../data-types/int-uint.md).
- `seed` (optional): The seed value to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md)
**Returned value**
- Array with elements partially shuffled.
**Implementation details**
:::note
This function will not materialize constants.
The value of `limit` should be in the range `[1..N]`. Values outside of that range are equivalent to performing full [arrayShuffle](#arrayshuffle).
:::
**Examples**
Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function.
Query:
```sql
SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 1)
```
Result:
The order of elements is preserved (`[2,3,4,5], [7,8,9,10]`) except for the two shuffled elements `[1, 6]`. No `seed` is provided so the function selects its own randomly.
```response
[6,2,3,4,5,1,7,8,9,10]
```
In this example, the `limit` is increased to `2` and a `seed` value is provided. The order
Query:
```sql
SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 2);
```
The order of elements is preserved (`[4, 5, 6, 7, 8], [10]`) except for the four shuffled elements `[1, 2, 3, 9]`.
Result:
```response
[3,9,1,4,5,6,7,8,2,10]
```
## arrayUniq(arr, …)
If one argument is passed, it counts the number of different elements in the array.
@ -1400,21 +1632,91 @@ Result:
└────────────────────────────────┘
```
## arrayEnumerateDense(arr)
## arrayEnumerateDense
Returns an array of the same size as the source array, indicating where each element first appears in the source array.
Example:
**Syntax**
```sql
arrayEnumerateDense(arr)
```
**Example**
Query:
``` sql
SELECT arrayEnumerateDense([10, 20, 10, 30])
```
Result:
``` text
┌─arrayEnumerateDense([10, 20, 10, 30])─┐
│ [1,2,1,3] │
└───────────────────────────────────────┘
```
## arrayEnumerateDenseRanked
Returns an array the same size as the source array, indicating where each element first appears in the source array. It allows for enumeration of a multidimensional array with the ability to specify how deep to look inside the array.
**Syntax**
```sql
arrayEnumerateDenseRanked(clear_depth, arr, max_array_depth)
```
**Parameters**
- `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`.
- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md).
- `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`.
**Example**
With `clear_depth=1` and `max_array_depth=1`, the result is identical to what [arrayEnumerateDense](#arrayenumeratedense) would give.
Query:
``` sql
SELECT arrayEnumerateDenseRanked(1,[10, 20, 10, 30],1);
```
Result:
``` text
[1,2,1,3]
```
In this example, `arrayEnumerateDenseRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[10,10,30,20]`, the corresponding first row of the result is `[1,1,2,3]`, indicating that `10` is the first number encountered in position 1 and 2, `30` the second number encountered in position 3 and `20` is the third number encountered in position 4. For the second row, `[40, 50, 10, 30]`, the corresponding second row of the result is `[4,5,1,2]`, indicating that `40` and `50` are the fourth and fifth numbers encountered in position 1 and 2 of that row, that another `10` (the first encountered number) is in position 3 and `30` (the second number encountered) is in the last position.
Query:
``` sql
SELECT arrayEnumerateDenseRanked(1,[[10,10,30,20],[40,50,10,30]],2);
```
Result:
``` text
[[1,1,2,3],[4,5,1,2]]
```
Changing `clear_depth=2` results in the enumeration occurring separately for each row anew.
Query:
``` sql
SELECT arrayEnumerateDenseRanked(2,[[10,10,30,20],[40,50,10,30]],2);
```
Result:
``` text
[[1,1,2,3],[1,2,3,4]]
```
## arrayIntersect(arr)
@ -1652,7 +1954,7 @@ flatten(array_of_arrays)
Alias: `flatten`.
**Arguments**
**Parameters**
- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`.
@ -1928,7 +2230,67 @@ Note that the `arrayAll` is a [higher-order function](../../sql-reference/functi
Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0.
Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayFirstOrNull
Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise it returns `NULL`.
**Syntax**
```sql
arrayFirstOrNull(func, arr1, …)
```
**Parameters**
- `func`: Lambda function. [Lambda function](../functions/#higher-order-functions---operator-and-lambdaparams-expr-function).
- `arr1`: Array to operate on. [Array](../data-types/array.md).
**Returned value**
- The first element in the passed array.
- Otherwise, returns `NULL`
**Implementation details**
Note that the `arrayFirstOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
**Example**
Query:
```sql
SELECT arrayFirstOrNull(x -> x >= 2, [1, 2, 3]);
```
Result:
```response
2
```
Query:
```sql
SELECT arrayFirstOrNull(x -> x >= 2, emptyArrayUInt8());
```
Result:
```response
\N
```
Query:
```sql
SELECT arrayLastOrNull((x,f) -> f, [1,2,3,NULL], [0,1,0,1]);
```
Result:
```response
\N
```
## arrayLast(func, arr1, …)
@ -1936,6 +2298,56 @@ Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[
Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayLastOrNull
Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise returns `NULL`.
**Syntax**
```sql
arrayLastOrNull(func, arr1, …)
```
**Parameters**
- `func`: Lambda function. [Lambda function](../functions/#higher-order-functions---operator-and-lambdaparams-expr-function).
- `arr1`: Array to operate on. [Array](../data-types/array.md).
**Returned value**
- The last element in the passed array.
- Otherwise, returns `NULL`
**Implementation details**
Note that the `arrayLastOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
**Example**
Query:
```sql
SELECT arrayLastOrNull(x -> x >= 2, [1, 2, 3]);
```
Result:
```response
3
```
Query:
```sql
SELECT arrayLastOrNull(x -> x >= 2, emptyArrayUInt8());
```
Result:
```response
\N
```
## arrayFirstIndex(func, arr1, …)
Returns the index of the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0.

View File

@ -1906,7 +1906,7 @@ Aliases: `dateAdd`, `DATE_ADD`.
**Arguments**
- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
- `unit` — The type of interval to add. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted.
Possible values:
- `second`
@ -1961,7 +1961,7 @@ Aliases: `dateSub`, `DATE_SUB`.
**Arguments**
- `unit` — The type of interval to subtract. Note: The unit should be unquoted.
- `unit` — The type of interval to subtract. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted.
Possible values:

View File

@ -81,6 +81,43 @@ Result:
│ 2.23606797749979 │
└──────────────────┘
```
## L2SquaredNorm
Calculates the square root of the sum of the squares of the vector values (the [L2Norm](#l2norm)) squared.
**Syntax**
```sql
L2SquaredNorm(vector)
```
Alias: `normL2Squared`.
***Arguments**
- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
**Returned value**
- L2-norm squared.
Type: [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT L2SquaredNorm((1, 2));
```
Result:
```text
┌─L2SquaredNorm((1, 2))─┐
│ 5 │
└───────────────────────┘
```
## LinfNorm

View File

@ -594,6 +594,45 @@ Calculates JumpConsistentHash form a UInt64.
Accepts two arguments: a UInt64-type key and the number of buckets. Returns Int32.
For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf)
## kostikConsistentHash
An O(1) time and space consistent hash algorithm by Konstantin 'kostik' Oblakov. Previously `yandexConsistentHash`.
**Syntax**
```sql
kostikConsistentHash(input, n)
```
Alias: `yandexConsistentHash` (left for backwards compatibility sake).
**Parameters**
- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md).
**Returned value**
- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
**Implementation details**
It is efficient only if n <= 32768.
**Example**
Query:
```sql
SELECT kostikConsistentHash(16045690984833335023, 2);
```
```response
┌─kostikConsistentHash(16045690984833335023, 2)─┐
│ 1 │
└───────────────────────────────────────────────┘
```
## murmurHash2_32, murmurHash2_64
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.
@ -1153,6 +1192,42 @@ Result:
└────────────┘
```
## wyHash64
Produces a 64-bit [wyHash64](https://github.com/wangyi-fudan/wyhash) hash value.
**Syntax**
```sql
wyHash64(string)
```
**Arguments**
- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- Hash value.
Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
Query:
```sql
SELECT wyHash64('ClickHouse') AS Hash;
```
Result:
```response
┌─────────────────Hash─┐
│ 12336419557878201794 │
└──────────────────────┘
```
## ngramMinHash
Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.

View File

@ -916,6 +916,34 @@ Returns the larger value of a and b.
Returns the servers uptime in seconds.
If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value.
**Syntax**
``` sql
uptime()
```
**Returned value**
- Time value of seconds.
Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT uptime() as Uptime;
```
Result:
``` response
┌─Uptime─┐
│ 55867 │
└────────┘
```
## version()
Returns the current version of ClickHouse as a string in the form of:

View File

@ -254,14 +254,70 @@ Result:
Converts the ASCII Latin symbols in a string to lowercase.
*Syntax**
``` sql
lower(input)
```
Alias: `lcase`
**Parameters**
- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
**Example**
Query:
```sql
SELECT lower('CLICKHOUSE');
```
```response
┌─lower('CLICKHOUSE')─┐
│ clickhouse │
└─────────────────────┘
```
## upper
Converts the ASCII Latin symbols in a string to uppercase.
**Syntax**
``` sql
upper(input)
```
Alias: `ucase`
**Parameters**
- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
**Examples**
Query:
``` sql
SELECT upper('clickhouse');
```
``` response
┌─upper('clickhouse')─┐
│ CLICKHOUSE │
└─────────────────────┘
```
## lowerUTF8
Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
@ -278,6 +334,34 @@ Does not detect the language, e.g. for Turkish the result might not be exactly c
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
**Syntax**
``` sql
upperUTF8(input)
```
**Parameters**
- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
**Example**
Query:
``` sql
SELECT upperUTF8('München') as Upperutf8;
```
``` response
┌─Upperutf8─┐
│ MÜNCHEN │
└───────────┘
```
## isValidUTF8
Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0.

View File

@ -193,3 +193,33 @@ Result:
## translateUTF8
Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings.
**Syntax**
``` sql
translateUTF8(s, from, to)
```
**Parameters**
- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md).
- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md).
- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Returned value**
- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res;
```
``` response
┌─res──────────────┐
│ Munchener Strase │
└──────────────────┘
```

View File

@ -6,14 +6,17 @@ sidebar_label: Searching in Strings
# Functions for Searching in Strings
All functions in this section search by default case-sensitively. Case-insensitive search is usually provided by separate function variants.
Note that case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in English language is
`I` whereas in Turkish language it is `İ` - results for languages other than English may be unexpected.
All functions in this section search case-sensitively by default. Case-insensitive search is usually provided by separate function variants.
Functions in this section also assume that the searched string and the search string are single-byte encoded text. If this assumption is
:::note
Case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in the English language is
`I` whereas in the Turkish language it is `İ` - results for languages other than English may be unexpected.
:::
Functions in this section also assume that the searched string (referred to in this section as `haystack`) and the search string (referred to in this section as `needle`) are single-byte encoded text. If this assumption is
violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function
variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the
results are undefined. Note that no automatic Unicode normalization is performed, you can use the
results are undefined. Note that no automatic Unicode normalization is performed, however you can use the
[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
[General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately.
@ -54,6 +57,8 @@ Type: `Integer`.
**Examples**
Query:
``` sql
SELECT position('Hello, world!', '!');
```
@ -68,6 +73,8 @@ Result:
Example with `start_pos` argument:
Query:
``` sql
SELECT
position('Hello, world!', 'o', 1),
@ -84,6 +91,8 @@ Result:
Example for `needle IN haystack` syntax:
Query:
```sql
SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s);
```
@ -98,6 +107,8 @@ Result:
Examples with empty `needle` substring:
Query:
``` sql
SELECT
position('abc', ''),
@ -109,6 +120,8 @@ SELECT
position('abc', '', 5)
```
Result:
``` text
┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐
│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │
@ -132,7 +145,23 @@ locate(needle, haystack[, start_pos])
## positionCaseInsensitive
Like [position](#position) but searches case-insensitively.
A case insensitive invariant of [position](#position).
**Example**
Query:
``` sql
SELECT position('Hello, world!', 'hello');
```
Result:
``` text
┌─position('Hello, world!', 'hello')─┐
│ 0 │
└────────────────────────────────────┘
```
## positionUTF8
@ -142,6 +171,8 @@ Like [position](#position) but assumes `haystack` and `needle` are UTF-8 encoded
Function `positionUTF8` correctly counts character `ö` (represented by two points) as a single Unicode codepoint:
Query:
``` sql
SELECT positionUTF8('Motörhead', 'r');
```
@ -175,14 +206,17 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. Array
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned values**
- Array of the starting position in bytes and counting from 1 (if the substring was found) or 0 (if the substring was not found)
- Array of the starting position in bytes and counting from 1, if the substring was found.
- 0, if the substring was not found.
**Example**
Query:
``` sql
SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']);
```
@ -194,45 +228,535 @@ Result:
│ [0,13,0] │
└───────────────────────────────────────────────────────────────────┘
```
## multiSearchAllPositionsCaseInsensitive
## multiSearchAllPositionsUTF8
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings.
## multiSearchFirstPosition
Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
Functions `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Like [multiSearchAllPositions](#multisearchallpositions) but ignores case.
**Syntax**
```sql
multiSearchFirstPosition(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Query:
```sql
SELECT multiSearchAllPositionsCaseInsensitive('ClickHouse',['c','h']);
```
Result:
```response
["1","6"]
```
## multiSearchAllPositionsUTF8
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
**Syntax**
```sql
multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Given `ClickHouse` as a UTF-8 string, find the positions of `C` (`\x43`) and `H` (`\x48`).
Query:
```sql
SELECT multiSearchAllPositionsUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']);
```
Result:
```response
["1","6"]
```
## multiSearchAllPositionsCaseInsensitiveUTF8
Like [multiSearchAllPositionsUTF8](#multisearchallpositionsutf8) but ignores case.
**Syntax**
```sql
multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Array of the starting position in bytes and counting from 1 (if the substring was found).
- 0 if the substring was not found.
**Example**
Given `ClickHouse` as a UTF-8 string, find the positions of `c` (`\x63`) and `h` (`\x68`).
Query:
```sql
SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x63','\x68']);
```
Result:
```response
["1","6"]
```
## multiSearchFirstPosition
Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstPosition('Hello World',['llo', 'Wor', 'ld']);
```
Result:
```response
3
```
## multiSearchFirstPositionCaseInsensitive
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case.
**Syntax**
```sql
multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstPositionCaseInsensitive('HELLO WORLD',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstPositionUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings.
**Syntax**
```sql
multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
- 0, if there was no match.
**Example**
Find the leftmost offset in UTF-8 string `hello world` which matches any of the given needles.
Query:
```sql
SELECT multiSearchFirstPositionUTF8('\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstPositionCaseInsensitiveUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
**Syntax**
```sql
multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings, ignoring case.
- 0, if there was no match.
**Example**
Find the leftmost offset in UTF-8 string `HELLO WORLD` which matches any of the given needles.
Query:
```sql
SELECT multiSearchFirstPositionCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['wor', 'ld', 'ello']);
```
Result:
```response
2
```
## multiSearchFirstIndex
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise.
Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` and `multiSearchFirstIndexCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstIndex('Hello World',['World','Hello']);
```
## multiSearchAny {#multisearchany}
Result:
```response
1
```
## multiSearchFirstIndexCaseInsensitive
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Ignores case.
**Syntax**
```sql
multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Query:
```sql
SELECT multiSearchFirstIndexCaseInsensitive('hElLo WoRlD',['World','Hello']);
```
Result:
```response
1
```
## multiSearchFirstIndexUTF8
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings.
**Syntax**
```sql
multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Given `Hello World` as a UTF-8 string, find the first index of UTF-8 strings `Hello` and `World`.
Query:
```sql
SELECT multiSearchFirstIndexUTF8('\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64',['\x57\x6f\x72\x6c\x64','\x48\x65\x6c\x6c\x6f']);
```
Result:
```response
1
```
## multiSearchFirstIndexCaseInsensitiveUTF8
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. Ignores case.
**Syntax**
```sql
multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- index (starting from 1) of the leftmost found needle.
- 0, if there was no match.
**Example**
Given `HELLO WORLD` as a UTF-8 string, find the first index of UTF-8 strings `hello` and `world`.
Query:
```sql
SELECT multiSearchFirstIndexCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['\x68\x65\x6c\x6c\x6f','\x77\x6f\x72\x6c\x64']);
```
Result:
```response
1
```
## multiSearchAny
Returns 1, if at least one string needle<sub>i</sub> matches the string `haystack` and 0 otherwise.
Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearchAnyCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
```sql
multiSearchAny(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
multiSearchAny(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- 1, if there was at least one match.
- 0, if there was not at least one match.
**Example**
Query:
```sql
SELECT multiSearchAny('ClickHouse',['C','H']);
```
Result:
```response
1
```
## multiSearchAnyCaseInsensitive
Like [multiSearchAny](#multisearchany) but ignores case.
**Syntax**
```sql
multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- 1, if there was at least one case-insensitive match.
- 0, if there was not at least one case-insensitive match.
**Example**
Query:
```sql
SELECT multiSearchAnyCaseInsensitive('ClickHouse',['c','h']);
```
Result:
```response
1
```
## multiSearchAnyUTF8
Like [multiSearchAny](#multisearchany) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
*Syntax**
```sql
multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
**Returned value**
- 1, if there was at least one match.
- 0, if there was not at least one match.
**Example**
Given `ClickHouse` as a UTF-8 string, check if there are any `C` ('\x43') or `H` ('\x48') letters in the word.
Query:
```sql
SELECT multiSearchAnyUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']);
```
Result:
```response
1
```
## multiSearchAnyCaseInsensitiveUTF8
Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case.
*Syntax**
```sql
multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
**Returned value**
- 1, if there was at least one case-insensitive match.
- 0, if there was not at least one case-insensitive match.
**Example**
Given `ClickHouse` as a UTF-8 string, check if there is any letter `h`(`\x68`) in the word, ignoring case.
Query:
```sql
SELECT multiSearchAnyCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x68']);
```
Result:
```response
1
```
## match {#match}
@ -446,7 +970,7 @@ If the haystack or the LIKE expression are not valid UTF-8, the behavior is unde
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
To match against literal `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`.
To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`.
The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`.
Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`.
@ -1244,4 +1768,4 @@ SELECT hasTokenCaseInsensitiveOrNull('Hello World','hello,world');
```response
null
```
```

View File

@ -521,45 +521,6 @@ Result:
└──────────────────────────────────┘
```
## dotProduct
Calculates the scalar product of two tuples of the same size.
**Syntax**
```sql
dotProduct(tuple1, tuple2)
```
Alias: `scalarProduct`.
**Arguments**
- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
**Returned value**
- Scalar product.
Type: [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT dotProduct((1, 2), (2, 3));
```
Result:
```text
┌─dotProduct((1, 2), (2, 3))─┐
│ 8 │
└────────────────────────────┘
```
## tupleConcat
Combines tuples passed as arguments.
@ -584,6 +545,278 @@ SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res
└──────────────────────┘
```
## tupleIntDiv
Does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients.
**Syntax**
```sql
tupleIntDiv(tuple_num, tuple_div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values.
**Implementation details**
- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor.
- An error will be thrown for division by 0.
**Examples**
Query:
``` sql
SELECT tupleIntDiv((15, 10, 5), (5, 5, 5));
```
Result:
``` text
┌─tupleIntDiv((15, 10, 5), (5, 5, 5))─┐
│ (3,2,1) │
└─────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5));
```
Result:
``` text
┌─tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5))─┐
│ (2,1,0) │
└───────────────────────────────────────────┘
```
## tupleIntDivOrZero
Like [tupleIntDiv](#tupleintdiv) it does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0.
**Syntax**
```sql
tupleIntDivOrZero(tuple_num, tuple_div)
```
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values.
- Returns 0 for quotients where the divisor is 0.
**Implementation details**
- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDiv](#tupleintdiv).
**Examples**
Query:
``` sql
SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0));
```
Result:
``` text
┌─tupleIntDivOrZero((5, 10, 15), (0, 0, 0))─┐
│ (0,0,0) │
└───────────────────────────────────────────┘
```
## tupleIntDivByNumber
Does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients.
**Syntax**
```sql
tupleIntDivByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values.
**Implementation details**
- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor.
- An error will be thrown for division by 0.
**Examples**
Query:
``` sql
SELECT tupleIntDivByNumber((15, 10, 5), 5);
```
Result:
``` text
┌─tupleIntDivByNumber((15, 10, 5), 5)─┐
│ (3,2,1) │
└─────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8);
```
Result:
``` text
┌─tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8)─┐
│ (2,1,0) │
└─────────────────────────────────────────────┘
```
## tupleIntDivOrZeroByNumber
Like [tupleIntDivByNumber](#tupleintdivbynumber) it does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0.
**Syntax**
```sql
tupleIntDivOrZeroByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values.
- Returns 0 for quotients where the divisor is 0.
**Implementation details**
- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDivByNumber](#tupleintdivbynumber).
**Examples**
Query:
``` sql
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5);
```
Result:
``` text
┌─tupleIntDivOrZeroByNumber((15, 10, 5), 5)─┐
│ (3,2,1) │
└───────────────────────────────────────────┘
```
Query:
``` sql
SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0)
```
Result:
``` text
┌─tupleIntDivOrZeroByNumber((15, 10, 5), 0)─┐
│ (0,0,0) │
└───────────────────────────────────────────┘
```
## tupleModulo
Returns a tuple of the moduli (remainders) of division operations of two tuples.
**Syntax**
```sql
tupleModulo(tuple_num, tuple_mod)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `tuple_div`: Tuple of modulus values. [Tuple](../data-types/tuple) of numeric type.
**Returned value**
- Tuple of the remainders of division of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of non-zero integer values.
- An error is thrown for division by zero.
**Examples**
Query:
``` sql
SELECT tupleModulo((15, 10, 5), (5, 3, 2));
```
Result:
``` text
┌─tupleModulo((15, 10, 5), (5, 3, 2))─┐
│ (0,1,1) │
└─────────────────────────────────────┘
```
## tupleModuloByNumber
Returns a tuple of the moduli (remainders) of division operations of a tuple and a given divisor.
**Syntax**
```sql
tupleModuloByNumber(tuple_num, div)
```
**Parameters**
- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type.
- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type.
**Returned value**
- Tuple of the remainders of division of `tuple_num` and `div`. [Tuple](../data-types/tuple) of non-zero integer values.
- An error is thrown for division by zero.
**Examples**
Query:
``` sql
SELECT tupleModuloByNumber((15, 10, 5), 2);
```
Result:
``` text
┌─tupleModuloByNumber((15, 10, 5), 2)─┐
│ (1,0,1) │
└─────────────────────────────────────┘
```
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -133,8 +133,6 @@ For the query to run successfully, the following conditions must be met:
- Both tables must have the same indices and projections.
- Both tables must have the same storage policy.
If both tables have the same storage policy, use hardlink to attach partition. Otherwise, use copying the data to attach partition.
## REPLACE PARTITION
``` sql

View File

@ -8,7 +8,7 @@ sidebar_label: VIEW
You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
**Example with TO table**

View File

@ -20,11 +20,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
## DROP TABLE
Deletes the table.
In case when `IF EMPTY` clause is specified server will check if table is empty only on replica that received initial query.
Deletes one or more tables.
:::tip
Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
To undo the deletion of a table, please see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
:::
Syntax:
@ -33,7 +32,9 @@ Syntax:
DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db1.]name_1[, [db2.]name_2, ...] [ON CLUSTER cluster] [SYNC]
```
Note that deleting multiple tables at the same time is a non-atomic deletion. If a table fails to be deleted, subsequent tables will not be deleted.
Limitations:
- If the clause `IF EMPTY` is specified, the server checks the emptiness of the table only on the replica which received the query.
- Deleting multiple tables at once is not an atomic operation, i.e. if the deletion of a table fails, subsequent tables will not be deleted.
## DROP DICTIONARY

View File

@ -12,25 +12,23 @@ Some of the calculations that you can do are similar to those that can be done w
ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported.
| Feature | Support or workaround |
| Feature | Supported? |
|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | supported |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported |
| `ROWS` frame | supported |
| `RANGE` frame | supported, the default |
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead (`RANGE` works with any numeric type). |
| `GROUPS` frame | not supported |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
| `rank()`, `dense_rank()`, `row_number()` | supported |
| `lag/lead(value, offset)` | Not supported. Workarounds: |
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ |
| `ROWS` frame | ✅ |
| `RANGE` frame | ✅ (the default) |
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) |
| `GROUPS` frame | ❌ |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) |
| `rank()`, `dense_rank()`, `row_number()` | ✅ |
| `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
## ClickHouse-specific Window Functions
There are also the following window function that's specific to ClickHouse:
There is also the following ClickHouse specific window function:
### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS])
@ -89,6 +87,102 @@ These functions can be used only as a window function.
Let's have a look at some examples of how window functions can be used.
### Numbering rows
```sql
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M');
```
```sql
SELECT player, salary,
row_number() OVER (ORDER BY salary) AS row
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─row─┐
│ Michael Stanley │ 150000 │ 1 │
│ Scott Harrison │ 150000 │ 2 │
│ Charles Juarez │ 190000 │ 3 │
│ Gary Chen │ 195000 │ 4 │
│ Robert George │ 195000 │ 5 │
└─────────────────┴────────┴─────┘
```
```sql
SELECT player, salary,
row_number() OVER (ORDER BY salary) AS row,
rank() OVER (ORDER BY salary) AS rank,
dense_rank() OVER (ORDER BY salary) AS denseRank
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─row─┬─rank─┬─denseRank─┐
│ Michael Stanley │ 150000 │ 1 │ 1 │ 1 │
│ Scott Harrison │ 150000 │ 2 │ 1 │ 1 │
│ Charles Juarez │ 190000 │ 3 │ 3 │ 2 │
│ Gary Chen │ 195000 │ 4 │ 4 │ 3 │
│ Robert George │ 195000 │ 5 │ 4 │ 3 │
└─────────────────┴────────┴─────┴──────┴───────────┘
```
### Aggregation functions
Compare each player's salary to the average for their team.
```sql
SELECT player, salary, team,
avg(salary) OVER (PARTITION BY team) AS teamAvg,
salary - teamAvg AS diff
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐
│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 170000 │ 20000 │
│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 170000 │ -20000 │
│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │
│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 180000 │ -30000 │
│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │
└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘
```
Compare each player's salary to the maximum for their team.
```sql
SELECT player, salary, team,
max(salary) OVER (PARTITION BY team) AS teamAvg,
salary - teamAvg AS diff
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐
│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 190000 │ 0 │
│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 190000 │ -40000 │
│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │
│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 195000 │ -45000 │
│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │
└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘
```
### Partitioning by column
```sql
CREATE TABLE wf_partition
(
@ -120,6 +214,8 @@ ORDER BY
└──────────┴───────┴───────┴──────────────┘
```
### Frame bounding
```sql
CREATE TABLE wf_frame
(
@ -131,14 +227,19 @@ ENGINE = Memory;
INSERT INTO wf_frame FORMAT Values
(1,1,1), (1,2,2), (1,3,3), (1,4,4), (1,5,5);
```
-- frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
```sql
-- Frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -151,7 +252,9 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4,5] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- short form - no bound expression, no order by
SELECT
part_key,
@ -169,14 +272,19 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4,5] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
-- frame is bounded by the beggining of a partition and the current row
```sql
-- frame is bounded by the beginning of a partition and the current row
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -189,8 +297,10 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
-- short form (frame is bounded by the beggining of a partition and the current row)
```sql
-- short form (frame is bounded by the beginning of a partition and the current row)
SELECT
part_key,
value,
@ -207,8 +317,10 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
-- frame is bounded by the beggining of a partition and the current row, but order is backward
```sql
-- frame is bounded by the beginning of a partition and the current row, but order is backward
SELECT
part_key,
value,
@ -225,14 +337,19 @@ ORDER BY
│ 1 │ 4 │ 4 │ [5,4] │
│ 1 │ 5 │ 5 │ [5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- sliding frame - 1 PRECEDING ROW AND CURRENT ROW
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -245,14 +362,19 @@ ORDER BY
│ 1 │ 4 │ 4 │ [3,4] │
│ 1 │ 5 │ 5 │ [4,5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- sliding frame - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -264,7 +386,9 @@ ORDER BY
│ 1 │ 4 │ 4 │ [3,4,5] │
│ 1 │ 5 │ 5 │ [4,5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- row_number does not respect the frame, so rn_1 = rn_2 = rn_3 != rn_4
SELECT
part_key,
@ -278,8 +402,11 @@ SELECT
FROM wf_frame
WINDOW
w1 AS (PARTITION BY part_key ORDER BY order DESC),
w2 AS (PARTITION BY part_key ORDER BY order DESC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW)
w2 AS (
PARTITION BY part_key
ORDER BY order DESC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW
)
ORDER BY
part_key ASC,
value ASC;
@ -290,7 +417,9 @@ ORDER BY
│ 1 │ 4 │ 4 │ [5,4] │ 2 │ 2 │ 2 │ 2 │
│ 1 │ 5 │ 5 │ [5] │ 1 │ 1 │ 1 │ 1 │
└──────────┴───────┴───────┴──────────────┴──────┴──────┴──────┴──────┘
```
```sql
-- first_value and last_value respect the frame
SELECT
groupArray(value) OVER w1 AS frame_values_1,
@ -313,7 +442,9 @@ ORDER BY
│ [1,2,3,4] │ 1 │ 4 │ [3,4] │ 3 │ 4 │
│ [1,2,3,4,5] │ 1 │ 5 │ [4,5] │ 4 │ 5 │
└────────────────┴───────────────┴──────────────┴────────────────┴───────────────┴──────────────┘
```
```sql
-- second value within the frame
SELECT
groupArray(value) OVER w1 AS frame_values_1,
@ -330,7 +461,9 @@ ORDER BY
│ [1,2,3,4] │ 2 │
│ [2,3,4,5] │ 3 │
└────────────────┴──────────────┘
```
```sql
-- second value within the frame + Null for missing values
SELECT
groupArray(value) OVER w1 AS frame_values_1,
@ -351,7 +484,9 @@ ORDER BY
## Real world examples
### Maximum/total salary per department.
The following examples solve common real-world problems.
### Maximum/total salary per department
```sql
CREATE TABLE employees
@ -369,7 +504,9 @@ INSERT INTO employees FORMAT Values
('IT', 'Tim', 200),
('IT', 'Anna', 300),
('IT', 'Elen', 500);
```
```sql
SELECT
department,
employee_name AS emp,
@ -386,8 +523,10 @@ FROM
max(salary) OVER wndw AS max_salary_per_dep,
sum(salary) OVER wndw AS total_salary_per_dep
FROM employees
WINDOW wndw AS (PARTITION BY department
rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
WINDOW wndw AS (
PARTITION BY department
rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ORDER BY
department ASC,
employee_name ASC
@ -403,7 +542,7 @@ FROM
└────────────┴──────┴────────┴────────────────────┴──────────────────────┴──────────────────┘
```
### Cumulative sum.
### Cumulative sum
```sql
CREATE TABLE warehouse
@ -421,7 +560,9 @@ INSERT INTO warehouse VALUES
('sku1', '2020-01-01', 1),
('sku1', '2020-02-01', 1),
('sku1', '2020-03-01', 1);
```
```sql
SELECT
item,
ts,
@ -461,13 +602,18 @@ insert into sensors values('cpu_temp', '2020-01-01 00:00:00', 87),
('cpu_temp', '2020-01-01 00:00:05', 87),
('cpu_temp', '2020-01-01 00:00:06', 87),
('cpu_temp', '2020-01-01 00:00:07', 87);
```
```sql
SELECT
metric,
ts,
value,
avg(value) OVER
(PARTITION BY metric ORDER BY ts ASC Rows BETWEEN 2 PRECEDING AND CURRENT ROW)
AS moving_avg_temp
avg(value) OVER (
PARTITION BY metric
ORDER BY ts ASC
Rows BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg_temp
FROM sensors
ORDER BY
metric ASC,
@ -536,7 +682,9 @@ insert into sensors values('ambient_temp', '2020-01-01 00:00:00', 16),
('ambient_temp', '2020-03-01 12:00:00', 16),
('ambient_temp', '2020-03-01 12:00:00', 16),
('ambient_temp', '2020-03-01 12:00:00', 16);
```
```sql
SELECT
metric,
ts,

View File

@ -434,16 +434,18 @@ $ curl -v 'http://localhost:8123/predefined_query'
``` xml
<http_handlers>
<rule>
<url><![CDATA[regex:/query_param_with_url/\w+/(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></url>
<url><![CDATA[regex:/query_param_with_url/(?P<name_1>[^/]+)]]></url>
<methods>GET</methods>
<headers>
<XXX>TEST_HEADER_VALUE</XXX>
<PARAMS_XXX><![CDATA[(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></PARAMS_XXX>
<PARAMS_XXX><![CDATA[regex:(?P<name_2>[^/]+)]]></PARAMS_XXX>
</headers>
<handler>
<type>predefined_query_handler</type>
<query>SELECT value FROM system.settings WHERE name = {name_1:String}</query>
<query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
<query>
SELECT name, value FROM system.settings
WHERE name IN ({name_1:String}, {name_2:String})
</query>
</handler>
</rule>
<defaults/>
@ -451,13 +453,13 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_final_threads 2
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2'
max_final_threads 2
max_threads 1
```
:::note Предупреждение
В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`.
В одном `predefined_query_handler` поддерживается только один запрос.
:::
### dynamic_query_handler {#dynamic_query_handler}

View File

@ -1,28 +0,0 @@
---
slug: /ru/sql-reference/data-types/multiword-types
sidebar_position: 61
sidebar_label: Составные типы
---
# Составные типы {#multiword-types}
При создании таблиц вы можете использовать типы данных с названием, состоящим из нескольких слов. Такие названия поддерживаются для лучшей совместимости с SQL.
## Поддержка составных типов {#multiword-types-support}
| Составные типы | Обычные типы |
|-------------------------------------|-----------------------------------------------------------|
| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) |
| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) |
| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| BINARY VARYING | [String](../../sql-reference/data-types/string.md) |

View File

@ -427,29 +427,32 @@ $ curl -v 'http://localhost:8123/predefined_query'
``` xml
<http_handlers>
<rule>
<url><![CDATA[/query_param_with_url/\w+/(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></url>
<method>GET</method>
<url><![CDATA[regex:/query_param_with_url/(?P<name_1>[^/]+)]]></url>
<methods>GET</methods>
<headers>
<XXX>TEST_HEADER_VALUE</XXX>
<PARAMS_XXX><![CDATA[(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></PARAMS_XXX>
<PARAMS_XXX><![CDATA[regex:(?P<name_2>[^/]+)]]></PARAMS_XXX>
</headers>
<handler>
<type>predefined_query_handler</type>
<query>SELECT value FROM system.settings WHERE name = {name_1:String}</query>
<query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
<query>
SELECT name, value FROM system.settings
WHERE name IN ({name_1:String}, {name_2:String})
</query>
</handler>
</rule>
<defaults/>
</http_handlers>
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_final_threads 2
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2'
max_final_threads 2
max_threads 1
```
:::warning
在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。
在一个`predefined_query_handler`中,只支持的一个`查询`。
:::
### 动态查询 {#dynamic_query_handler}

View File

@ -1,10 +0,0 @@
---
slug: /zh/sql-reference/data-types/multiword-types
sidebar_position: 61
sidebar_label: Multiword Type Names
title: "Multiword Types"
---
import Content from '@site/docs/en/sql-reference/data-types/multiword-types.md';
<Content />

View File

@ -3,7 +3,7 @@
function _clickhouse_get_utils()
{
local cmd=$1 && shift
"$cmd" --help |& awk '/^clickhouse.*args/ { print $2 }'
"$cmd" help |& awk '/^clickhouse.*args/ { print $2 }'
}
function _complete_for_clickhouse_entrypoint_bin()

View File

@ -482,6 +482,7 @@ void Client::connect()
server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch);
load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION) && !config().getBool("disable_suggestion", false);
wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false);
if (server_display_name = connection->getServerDisplayName(connection_parameters.timeouts); server_display_name.empty())
server_display_name = config().getString("host", "localhost");
@ -687,7 +688,11 @@ bool Client::processWithFuzzing(const String & full_query)
try
{
const char * begin = full_query.data();
orig_ast = parseQuery(begin, begin + full_query.size(), true);
orig_ast = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
/*allow_multi_statements=*/ true,
/*is_interactive=*/ is_interactive,
/*ignore_error=*/ ignore_error);
}
catch (const Exception & e)
{
@ -934,8 +939,8 @@ void Client::addOptions(OptionsDescription & options_description)
("user,u", po::value<std::string>()->default_value("default"), "user")
("password", po::value<std::string>(), "password")
("ask-password", "ask-password")
("ssh-key-file", po::value<std::string>(), "File containing ssh private key needed for authentication. If not set does password authentication.")
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for imported ssh key.")
("ssh-key-file", po::value<std::string>(), "File containing the SSH private key for authenticate with the server.")
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for the SSH private key specified by --ssh-key-file.")
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
@ -950,6 +955,7 @@ void Client::addOptions(OptionsDescription & options_description)
("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
("no-warnings", "disable warnings when client connects to server")
/// TODO: Left for compatibility as it's used in upgrade check, remove after next release and use server setting ignore_drop_queries_probability
("fake-drop", "Ignore all DROP queries, should be used only for testing")
("accept-invalid-certificate", "Ignore certificate verification errors, equal to config parameters openSSL.client.invalidCertificateHandler.name=AcceptCertificateHandler and openSSL.client.verificationMode=none")
;
@ -1093,7 +1099,7 @@ void Client::processOptions(const OptionsDescription & options_description,
if (options.count("no-warnings"))
config().setBool("no-warnings", true);
if (options.count("fake-drop"))
fake_drop = true;
config().setString("ignore_drop_queries_probability", "1");
if (options.count("accept-invalid-certificate"))
{
config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler");

View File

@ -166,7 +166,7 @@ int DisksApp::main(const std::vector<String> & /*args*/)
{
String config_path = config().getString("config-file", getDefaultConfigFileName());
ConfigProcessor config_processor(config_path, false, false);
config_processor.setConfigPath(fs::path(config_path).parent_path());
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), false, false);
}

View File

@ -46,12 +46,12 @@ INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml");
*
* The following steps are performed:
*
* - copying the binary to binary directory (/usr/bin).
* - copying the binary to binary directory (/usr/bin/)
* - creation of symlinks for tools.
* - creation of clickhouse user and group.
* - creation of config directory (/etc/clickhouse-server).
* - creation of config directory (/etc/clickhouse-server/).
* - creation of default configuration files.
* - creation of a directory for logs (/var/log/clickhouse-server).
* - creation of a directory for logs (/var/log/clickhouse-server/).
* - creation of a data directory if not exists.
* - setting a password for default user.
* - choose an option to listen connections.
@ -226,7 +226,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
desc.add_options()
("help,h", "produce help message")
("prefix", po::value<std::string>()->default_value("/"), "prefix for all paths")
#if defined (OS_DARWIN)
/// https://stackoverflow.com/a/36734569/22422288
("binary-path", po::value<std::string>()->default_value("usr/local/bin"), "where to install binaries")
#else
("binary-path", po::value<std::string>()->default_value("usr/bin"), "where to install binaries")
#endif
("config-path", po::value<std::string>()->default_value("etc/clickhouse-server"), "where to install configs")
("log-path", po::value<std::string>()->default_value("var/log/clickhouse-server"), "where to create log directory")
("data-path", po::value<std::string>()->default_value("var/lib/clickhouse"), "directory for data")
@ -662,7 +667,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" <server>\n"
" <certificateFile>" << (config_dir / "server.crt").string() << "</certificateFile>\n"
" <privateKeyFile>" << (config_dir / "server.key").string() << "</privateKeyFile>\n"
" <dhParamsFile>" << (config_dir / "dhparam.pem").string() << "</dhParamsFile>\n"
" </server>\n"
" </openSSL>\n"
"</clickhouse>\n";
@ -1217,7 +1221,12 @@ int mainEntryClickHouseStart(int argc, char ** argv)
desc.add_options()
("help,h", "produce help message")
("prefix", po::value<std::string>()->default_value("/"), "prefix for all paths")
#if defined (OS_DARWIN)
/// https://stackoverflow.com/a/36734569/22422288
("binary-path", po::value<std::string>()->default_value("usr/local/bin"), "directory with binary")
#else
("binary-path", po::value<std::string>()->default_value("usr/bin"), "directory with binary")
#endif
("config-path", po::value<std::string>()->default_value("etc/clickhouse-server"), "directory with configs")
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")
@ -1333,7 +1342,12 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
desc.add_options()
("help,h", "produce help message")
("prefix", po::value<std::string>()->default_value("/"), "prefix for all paths")
#if defined (OS_DARWIN)
/// https://stackoverflow.com/a/36734569/22422288
("binary-path", po::value<std::string>()->default_value("usr/local/bin"), "directory with binary")
#else
("binary-path", po::value<std::string>()->default_value("usr/bin"), "directory with binary")
#endif
("config-path", po::value<std::string>()->default_value("etc/clickhouse-server"), "directory with configs")
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")

View File

@ -368,7 +368,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml"));
/// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present.
config_processor.registerEmbeddedConfig("config.xml", "<clickhouse/>");
ConfigProcessor::registerEmbeddedConfig("config.xml", "<clickhouse/>");
auto clickhouse_config = config_processor.loadConfig();
Poco::Util::AbstractConfiguration::Keys keys;

View File

@ -122,7 +122,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
{
const auto config_path = config().getString("config-file", "config.xml");
ConfigProcessor config_processor(config_path, false, true);
config_processor.setConfigPath(fs::path(config_path).parent_path());
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
@ -413,8 +413,20 @@ void LocalServer::setupUsers()
void LocalServer::connect()
{
connection_parameters = ConnectionParameters(config(), "localhost");
ReadBuffer * in;
auto table_file = config().getString("table-file", "-");
if (table_file == "-" || table_file == "stdin")
{
in = &std_in;
}
else
{
input = std::make_unique<ReadBufferFromFile>(table_file);
in = input.get();
}
connection = LocalConnection::createConnection(
connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name);
connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name);
}
@ -560,6 +572,7 @@ void LocalServer::processConfig()
const std::string clickhouse_dialect{"clickhouse"};
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
&& config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false);
auto logging = (config().has("logger.console")
|| config().has("logger.level")
@ -835,6 +848,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setString("logger.level", options["logger.level"].as<std::string>());
if (options.count("send_logs_level"))
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
if (options.count("wait_for_suggestions_to_load"))
config().setBool("wait_for_suggestions_to_load", true);
}
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)

View File

@ -65,6 +65,8 @@ private:
std::optional<StatusFile> status;
std::optional<std::filesystem::path> temporary_directory_to_delete;
std::unique_ptr<ReadBufferFromFile> input;
};
}

View File

@ -487,7 +487,7 @@ int main(int argc_, char ** argv_)
/// Interpret binary without argument or with arguments starts with dash
/// ('-') as clickhouse-local for better usability:
///
/// clickhouse # dumps help
/// clickhouse help # dumps help
/// clickhouse -q 'select 1' # use local
/// clickhouse # spawn local
/// clickhouse local # spawn local

View File

@ -734,13 +734,17 @@ try
LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info);
#endif
bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log");
// Initialize global thread pool. Do it before we fetch configs from zookeeper
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
GlobalThreadPool::initialize(
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size);
server_settings.thread_pool_queue_size,
will_have_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0,
will_have_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0);
/// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
SCOPE_EXIT({
Stopwatch watch;

View File

@ -4,11 +4,12 @@
#include <Access/ExternalAuthenticators.h>
#include <Access/LDAPClient.h>
#include <Access/GSSAcceptor.h>
#include <Common/Exception.h>
#include <Poco/SHA1Engine.h>
#include <Common/Exception.h>
#include <Common/SSHWrapper.h>
#include <Common/typeid_cast.h>
#include <Common/SSH/Wrappers.h>
#include "config.h"
namespace DB
{
@ -74,7 +75,7 @@ namespace
}
#if USE_SSH
bool checkSshSignature(const std::vector<ssh::SSHKey> & keys, std::string_view signature, std::string_view original)
bool checkSshSignature(const std::vector<SSHKey> & keys, std::string_view signature, std::string_view original)
{
for (const auto & key: keys)
if (key.isPublic() && key.verifySignature(signature, original))
@ -114,7 +115,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
@ -145,7 +150,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
@ -178,7 +187,11 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::BCRYPT_PASSWORD:
return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
@ -216,13 +229,18 @@ bool Authentication::areCredentialsValid(
return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName());
case AuthenticationType::SSH_KEY:
throw Authentication::Require<SshCredentials>("Ssh Keys Authentication");
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::MAX:
break;
}
}
#if USE_SSH
if (const auto * ssh_credentials = typeid_cast<const SshCredentials *>(&credentials))
{
switch (auth_data.getType())
@ -243,15 +261,12 @@ bool Authentication::areCredentialsValid(
throw Authentication::Require<SSLCertificateCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::SSH_KEY:
#if USE_SSH
return checkSshSignature(auth_data.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal());
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
#endif
case AuthenticationType::MAX:
break;
}
}
#endif
if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast<const AlwaysAllowCredentials *>(&credentials))
return true;

View File

@ -105,7 +105,10 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names)
&& (lhs.ssh_keys == rhs.ssh_keys) && (lhs.http_auth_scheme == rhs.http_auth_scheme)
#if USE_SSH
&& (lhs.ssh_keys == rhs.ssh_keys)
#endif
&& (lhs.http_auth_scheme == rhs.http_auth_scheme)
&& (lhs.http_auth_server_name == rhs.http_auth_server_name);
}
@ -326,7 +329,7 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
break;
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}
case AuthenticationType::HTTP:
@ -355,7 +358,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
{
#if USE_SSH
AuthenticationData auth_data(*query.type);
std::vector<ssh::SSHKey> keys;
std::vector<SSHKey> keys;
size_t args_size = query.children.size();
for (size_t i = 0; i < args_size; ++i)
@ -366,7 +369,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
try
{
keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(key_base64, type));
keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(key_base64, type));
}
catch (const std::invalid_argument &)
{
@ -377,7 +380,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
auth_data.setSSHKeys(std::move(keys));
return auth_data;
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}

View File

@ -2,14 +2,16 @@
#include <Access/Common/AuthenticationType.h>
#include <Access/Common/HTTPAuthenticationScheme.h>
#include <Common/SSHWrapper.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <Common/SSH/Wrappers.h>
#include <vector>
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#include "config.h"
namespace DB
{
@ -59,8 +61,10 @@ public:
const boost::container::flat_set<String> & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; }
void setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_);
const std::vector<ssh::SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<ssh::SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<ssh::SSHKey>>(ssh_keys_); }
#if USE_SSH
const std::vector<SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<SSHKey>>(ssh_keys_); }
#endif
HTTPAuthenticationScheme getHTTPAuthenticationScheme() const { return http_auth_scheme; }
void setHTTPAuthenticationScheme(HTTPAuthenticationScheme scheme) { http_auth_scheme = scheme; }
@ -94,7 +98,9 @@ private:
String kerberos_realm;
boost::container::flat_set<String> ssl_certificate_common_names;
String salt;
std::vector<ssh::SSHKey> ssh_keys;
#if USE_SSH
std::vector<SSHKey> ssh_keys;
#endif
/// HTTP authentication properties
String http_auth_server_name;
HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC;

View File

@ -34,8 +34,8 @@ enum class AuthenticationType
/// Password is encrypted in bcrypt hash.
BCRYPT_PASSWORD,
/// Server sends a random string named `challenge` which client needs to encrypt with private key.
/// The check is performed on server side by decrypting the data and comparing with the original string.
/// Server sends a random string named `challenge` to the client. The client encrypts it with its SSH private key.
/// The server decrypts the result using the SSH public key registered for the user and compares with the original string.
SSH_KEY,
/// Authentication through HTTP protocol

View File

@ -3,6 +3,7 @@
#include <base/types.h>
#include <memory>
#include "config.h"
namespace DB
{
@ -86,10 +87,11 @@ class MySQLNative41Credentials : public CredentialsWithScramble
using CredentialsWithScramble::CredentialsWithScramble;
};
#if USE_SSH
class SshCredentials : public Credentials
{
public:
explicit SshCredentials(const String& user_name_, const String& signature_, const String& original_)
SshCredentials(const String & user_name_, const String & signature_, const String & original_)
: Credentials(user_name_), signature(signature_), original(original_)
{
is_ready = true;
@ -117,5 +119,6 @@ private:
String signature;
String original;
};
#endif
}

View File

@ -31,7 +31,7 @@ void User::setName(const String & name_)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name is empty");
if (name_ == EncodedUserInfo::USER_INTERSERVER_MARKER)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
if (startsWith(name_, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
name = name_;
}

View File

@ -1,6 +1,5 @@
#include <Access/UsersConfigAccessStorage.h>
#include <Access/Quota.h>
#include <Common/SSH/Wrappers.h>
#include <Access/RowPolicy.h>
#include <Access/User.h>
#include <Access/Role.h>
@ -10,6 +9,7 @@
#include <Access/AccessChangesNotifier.h>
#include <Dictionaries/IDictionary.h>
#include <Common/Config/ConfigReloader.h>
#include <Common/SSHWrapper.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Common/transformEndianness.h>
@ -214,7 +214,7 @@ namespace
Poco::Util::AbstractConfiguration::Keys entries;
config.keys(ssh_keys_config, entries);
std::vector<ssh::SSHKey> keys;
std::vector<SSHKey> keys;
for (const String& entry : entries)
{
const auto conf_pref = ssh_keys_config + "." + entry + ".";
@ -237,7 +237,7 @@ namespace
try
{
keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(base64_key, type));
keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(base64_key, type));
}
catch (const std::invalid_argument &)
{
@ -249,7 +249,7 @@ namespace
}
user->auth_data.setSSHKeys(std::move(keys));
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}
else if (has_http_auth)

View File

@ -54,30 +54,30 @@ public:
{
const auto & value = columns[0]->getFloat64(row_num);
const auto & time = columns[1]->getFloat64(row_num);
this->data(place).add(value, time, half_decay);
data(place).add(value, time, half_decay);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs), half_decay);
data(place).merge(data(rhs), half_decay);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinary(this->data(place).value, buf);
writeBinary(this->data(place).time, buf);
writeBinary(data(place).value, buf);
writeBinary(data(place).time, buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinary(this->data(place).value, buf);
readBinary(this->data(place).time, buf);
readBinary(data(place).value, buf);
readBinary(data(place).time, buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & column = assert_cast<ColumnVector<Float64> &>(to);
column.getData().push_back(this->data(place).get(half_decay));
column.getData().push_back(data(place).get(half_decay));
}
};

View File

@ -293,32 +293,32 @@ public:
Float64 value = columns[0]->getFloat64(row_num);
UInt8 is_second = columns[1]->getUInt(row_num);
if (is_second)
this->data(place).addY(value, arena);
data(place).addY(value, arena);
else
this->data(place).addX(value, arena);
data(place).addX(value, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).merge(this->data(rhs), arena);
data(place).merge(data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf);
data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, arena);
data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
if (!this->data(place).size_x || !this->data(place).size_y)
if (!data(place).size_x || !data(place).size_y)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
auto [d_statistic, p_value] = data(place).getResult(alternative, method);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));

View File

@ -147,6 +147,8 @@ public:
negative_store->merge(other.negative_store.get());
}
/// NOLINTBEGIN(readability-static-accessed-through-instance)
void serialize(WriteBuffer& buf) const
{
// Write the mapping
@ -201,6 +203,8 @@ public:
count = static_cast<Float64>(negative_store->count + zero_count + store->count);
}
/// NOLINTEND(readability-static-accessed-through-instance)
private:
std::unique_ptr<DDSketchLogarithmicMapping> mapping;
std::unique_ptr<DDSketchDenseStore> store;

Some files were not shown because too many files have changed in this diff Show More