Merge remote-tracking branch 'upstream/master' into docker-image

This commit is contained in:
Nikolay Degterinsky 2024-04-09 13:13:58 +00:00
commit 6f78e89044
864 changed files with 11355 additions and 3591 deletions

View File

@ -96,7 +96,6 @@ Checks: [
'-modernize-use-default-member-init',
'-modernize-use-emplace',
'-modernize-use-nodiscard',
'-modernize-use-override',
'-modernize-use-trailing-return-type',
'-performance-inefficient-string-concatenation',

View File

@ -157,7 +157,7 @@ jobs:
################################# Stage Final #################################
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
if: ${{ !failure() && !cancelled() && github.event_name != 'merge_group' }}
needs: [Tests_1, Tests_2]
runs-on: [self-hosted, style-checker]
steps:

View File

@ -123,7 +123,6 @@
* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)).
* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -13,8 +13,6 @@
#include <tuple>
#include <limits>
#include <boost/math/special_functions/fpclassify.hpp>
// NOLINTBEGIN(*)
/// Use same extended double for all platforms
@ -22,6 +20,7 @@
#define CONSTEXPR_FROM_DOUBLE constexpr
using FromDoubleIntermediateType = long double;
#else
#include <boost/math/special_functions/fpclassify.hpp>
#include <boost/multiprecision/cpp_bin_float.hpp>
/// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended`
#define CONSTEXPR_FROM_DOUBLE
@ -309,6 +308,13 @@ struct integer<Bits, Signed>::_impl
constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
static_assert(std::is_same_v<T, double> || std::is_same_v<T, FromDoubleIntermediateType>);
/// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
#if (LDBL_MANT_DIG == 64)
if (!std::isfinite(t))
{
self = 0;
return;
}
#else
if constexpr (std::is_same_v<T, double>)
{
if (!std::isfinite(t))
@ -325,6 +331,7 @@ struct integer<Bits, Signed>::_impl
return;
}
}
#endif
const T alpha = t / static_cast<T>(max_int);

View File

@ -314,13 +314,13 @@ static int read_unicode(json_stream *json)
if (l < 0xdc00 || l > 0xdfff) {
json_error(json, "invalid surrogate pair continuation \\u%04lx out "
"of range (dc00-dfff)", l);
"of range (dc00-dfff)", (unsigned long)l);
return -1;
}
cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
} else if (cp >= 0xdc00 && cp <= 0xdfff) {
json_error(json, "dangling surrogate \\u%04lx", cp);
json_error(json, "dangling surrogate \\u%04lx", (unsigned long)cp);
return -1;
}

View File

@ -213,6 +213,19 @@ namespace Net
Poco::Timespan getKeepAliveTimeout() const;
/// Returns the connection timeout for HTTP connections.
void setKeepAliveMaxRequests(int max_requests);
int getKeepAliveMaxRequests() const;
int getKeepAliveRequest() const;
bool isKeepAliveExpired(double reliability = 1.0) const;
/// Returns if the connection is expired with some margin as fraction of timeout as reliability
double getKeepAliveReliability() const;
/// Returns the current fraction of keep alive timeout when connection is considered safe to use
/// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException
virtual std::ostream & sendRequest(HTTPRequest & request);
/// Sends the header for the given HTTP request to
/// the server.
@ -345,6 +358,8 @@ namespace Net
void assign(HTTPClientSession & session);
void setKeepAliveRequest(int request);
HTTPSessionFactory _proxySessionFactory;
/// Factory to create HTTPClientSession to proxy.
private:
@ -353,6 +368,8 @@ namespace Net
Poco::UInt16 _port;
ProxyConfig _proxyConfig;
Poco::Timespan _keepAliveTimeout;
int _keepAliveCurrentRequest = 0;
int _keepAliveMaxRequests = 1000;
Poco::Timestamp _lastRequest;
bool _reconnect;
bool _mustReconnect;
@ -361,6 +378,7 @@ namespace Net
Poco::SharedPtr<std::ostream> _pRequestStream;
Poco::SharedPtr<std::istream> _pResponseStream;
static const double _defaultKeepAliveReliabilityLevel;
static ProxyConfig _globalProxyConfig;
HTTPClientSession(const HTTPClientSession &);
@ -450,9 +468,19 @@ namespace Net
return _lastRequest;
}
inline void HTTPClientSession::setLastRequest(Poco::Timestamp time)
inline double HTTPClientSession::getKeepAliveReliability() const
{
_lastRequest = time;
return _defaultKeepAliveReliabilityLevel;
}
inline int HTTPClientSession::getKeepAliveMaxRequests() const
{
return _keepAliveMaxRequests;
}
inline int HTTPClientSession::getKeepAliveRequest() const
{
return _keepAliveCurrentRequest;
}
}

View File

@ -120,6 +120,10 @@ namespace Net
/// The value is set to "Keep-Alive" if keepAlive is
/// true, or to "Close" otherwise.
void setKeepAliveTimeout(int timeout, int max_requests);
int getKeepAliveTimeout() const;
int getKeepAliveMaxRequests() const;
bool getKeepAlive() const;
/// Returns true if
/// * the message has a Connection header field and its value is "Keep-Alive"

View File

@ -44,7 +44,7 @@ namespace Net
/// - timeout: 60 seconds
/// - keepAlive: true
/// - maxKeepAliveRequests: 0
/// - keepAliveTimeout: 10 seconds
/// - keepAliveTimeout: 15 seconds
void setServerName(const std::string & serverName);
/// Sets the name and port (name:port) that the server uses to identify itself.

View File

@ -56,6 +56,8 @@ namespace Net
SocketAddress serverAddress();
/// Returns the server's address.
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
private:
bool _firstRequest;
Poco::Timespan _keepAliveTimeout;

View File

@ -37,6 +37,7 @@ namespace Net {
HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig;
const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9;
HTTPClientSession::HTTPClientSession():
@ -220,7 +221,41 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config)
void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout)
{
_keepAliveTimeout = timeout;
if (connected())
{
throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, "
"That value is managed privately after connection is established.");
}
_keepAliveTimeout = timeout;
}
void HTTPClientSession::setKeepAliveMaxRequests(int max_requests)
{
if (connected())
{
throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, "
"That value is managed privately after connection is established.");
}
_keepAliveMaxRequests = max_requests;
}
void HTTPClientSession::setKeepAliveRequest(int request)
{
_keepAliveCurrentRequest = request;
}
void HTTPClientSession::setLastRequest(Poco::Timestamp time)
{
if (connected())
{
throw Poco::IllegalStateException("cannot change last request on initiated connection, "
"That value is managed privately after connection is established.");
}
_lastRequest = time;
}
@ -231,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
clearException();
_responseReceived = false;
_keepAliveCurrentRequest += 1;
bool keepAlive = getKeepAlive();
if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty())
{
@ -241,8 +278,10 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
{
if (!connected())
reconnect();
if (!keepAlive)
request.setKeepAlive(false);
if (!request.has(HTTPMessage::CONNECTION))
request.setKeepAlive(keepAlive);
if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0)
request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests);
if (!request.has(HTTPRequest::HOST) && !_host.empty())
request.setHost(_host, _port);
if (!_proxyConfig.host.empty() && !bypassProxy())
@ -324,6 +363,17 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response)
_mustReconnect = getKeepAlive() && !response.getKeepAlive();
if (!_mustReconnect)
{
/// when server sends its keep alive timeout, client has to follow that value
auto timeout = response.getKeepAliveTimeout();
if (timeout > 0)
_keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0));
auto max_requests = response.getKeepAliveMaxRequests();
if (max_requests > 0)
_keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests);
}
if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
_pResponseStream = new HTTPFixedLengthInputStream(*this, 0);
else if (response.getChunkedTransferEncoding())
@ -430,15 +480,18 @@ std::string HTTPClientSession::proxyRequestPrefix() const
return result;
}
bool HTTPClientSession::isKeepAliveExpired(double reliability) const
{
Poco::Timestamp now;
return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest
|| _keepAliveCurrentRequest > _keepAliveMaxRequests;
}
bool HTTPClientSession::mustReconnect() const
{
if (!_mustReconnect)
{
Poco::Timestamp now;
return _keepAliveTimeout <= now - _lastRequest;
}
else return true;
return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel);
return true;
}
@ -511,14 +564,21 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session)
if (buffered())
throw Poco::LogicException("assign to a session with not empty buffered data");
attachSocket(session.detachSocket());
setLastRequest(session.getLastRequest());
poco_assert(!connected());
setResolvedHost(session.getResolvedHost());
setKeepAlive(session.getKeepAlive());
setProxyConfig(session.getProxyConfig());
setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout());
setKeepAlive(session.getKeepAlive());
setLastRequest(session.getLastRequest());
setKeepAliveTimeout(session.getKeepAliveTimeout());
setProxyConfig(session.getProxyConfig());
_keepAliveMaxRequests = session._keepAliveMaxRequests;
_keepAliveCurrentRequest = session._keepAliveCurrentRequest;
attachSocket(session.detachSocket());
session.reset();
}

View File

@ -17,6 +17,7 @@
#include "Poco/NumberFormatter.h"
#include "Poco/NumberParser.h"
#include "Poco/String.h"
#include <format>
using Poco::NumberFormatter;
@ -179,4 +180,51 @@ bool HTTPMessage::getKeepAlive() const
}
void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests)
{
add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests));
}
int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name)
{
auto param_value_pos = header_value.find(param_name);
if (param_value_pos == std::string::npos)
param_value_pos = header_value.size();
if (param_value_pos != header_value.size())
param_value_pos += param_name.size();
auto param_value_end = header_value.find(',', param_value_pos);
if (param_value_end == std::string::npos)
param_value_end = header_value.size();
auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos);
if (timeout_value_substr.empty())
return -1;
int value = 0;
auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value);
if (ec == std::errc())
return value;
return -1;
}
int HTTPMessage::getKeepAliveTimeout() const
{
const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
static const std::string_view timeout_param = "timeout=";
return parseFromHeaderValues(ka_header, timeout_param);
}
int HTTPMessage::getKeepAliveMaxRequests() const
{
const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
static const std::string_view timeout_param = "max=";
return parseFromHeaderValues(ka_header, timeout_param);
}
} } // namespace Poco::Net

View File

@ -88,7 +88,18 @@ void HTTPServerConnection::run()
pHandler->handleRequest(request, response);
session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive());
}
/// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor)
if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive())
{
int value = response.getKeepAliveTimeout();
if (value < 0)
value = request.getKeepAliveTimeout();
if (value > 0)
session.setKeepAliveTimeout(Poco::Timespan(value, 0));
}
}
else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED);
}
catch (Poco::Exception&)

View File

@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession()
{
}
void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout)
{
_keepAliveTimeout = keepAliveTimeout;
}
bool HTTPServerSession::hasMoreRequests()
{

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1
Subproject commit cb5dc3c906e80f253e9ce9535807caef827cc2e0

View File

@ -32,6 +32,7 @@ set(SRCS
"${LIBRARY_DIR}/src/handle_custom_notification.cxx"
"${LIBRARY_DIR}/src/handle_vote.cxx"
"${LIBRARY_DIR}/src/launcher.cxx"
"${LIBRARY_DIR}/src/log_entry.cxx"
"${LIBRARY_DIR}/src/srv_config.cxx"
"${LIBRARY_DIR}/src/snapshot_sync_req.cxx"
"${LIBRARY_DIR}/src/snapshot_sync_ctx.cxx"
@ -50,6 +51,12 @@ else()
target_compile_definitions(_nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
endif()
target_link_libraries (_nuraft PRIVATE clickhouse_common_io)
# We must have it PUBLIC here because some headers which depend on it directly
# included in clickhouse
target_compile_definitions(_nuraft PUBLIC USE_CLICKHOUSE_THREADS=1)
MESSAGE(STATUS "Will use clickhouse threads for NuRaft")
target_include_directories (_nuraft SYSTEM PRIVATE "${LIBRARY_DIR}/include/libnuraft")
# for some reason include "asio.h" directly without "boost/" prefix.
target_include_directories (_nuraft SYSTEM PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/boost/boost")

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.3.1.2672"
ARG VERSION="24.3.2.23"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.3.1.2672"
ARG VERSION="24.3.2.23"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.3.1.2672"
ARG VERSION="24.3.2.23"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -25,7 +25,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
cache_policy=""
if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
if [ $(($RANDOM%2)) -eq 1 ]; then
cache_policy="SLRU"
else
cache_policy="LRU"

View File

@ -16,6 +16,8 @@ ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-odbc-bridge_*.deb
dpkg -i package_folder/clickhouse-library-bridge_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
@ -289,10 +291,10 @@ do
echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 )
err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 )
echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 )
err=$( { clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 )
echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
fi

View File

@ -72,7 +72,7 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c
# Randomize cache policies.
cache_policy=""
if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
if [ $(($RANDOM%2)) -eq 1 ]; then
cache_policy="SLRU"
else
cache_policy="LRU"
@ -87,6 +87,25 @@ if [ "$cache_policy" = "SLRU" ]; then
mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
fi
# Disable experimental WINDOW VIEW tests for stress tests, since they may be
# created with old analyzer and then, after server restart it will refuse to
# start.
# FIXME: remove once the support for WINDOW VIEW will be implemented in analyzer.
sudo cat /etc/clickhouse-server/users.d/stress_tests_overrides.xml <<EOL
<clickhouse>
<profiles>
<default>
<allow_experimental_window_view>false</allow_experimental_window_view>
<constraints>
<allow_experimental_window_view>
<readonly/>
</allow_experimental_window_view>
</constraints>
</default>
</profiles>
</clickhouse>
EOL
start_server
clickhouse-client --query "SHOW TABLES FROM datasets"

View File

@ -0,0 +1,29 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.3.2.23-lts (8b7d910960c) FIXME as compared to v24.3.1.2672-lts (2c5c589a882)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)).
* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value" [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#62030](https://github.com/ClickHouse/ClickHouse/issues/62030):. [#61869](https://github.com/ClickHouse/ClickHouse/pull/61869) ([Nikita Fomichev](https://github.com/fm4v)).
* Backported in [#62057](https://github.com/ClickHouse/ClickHouse/issues/62057): ... [#62044](https://github.com/ClickHouse/ClickHouse/pull/62044) ([Max K.](https://github.com/maxknv)).
* Backported in [#62204](https://github.com/ClickHouse/ClickHouse/issues/62204):. [#62190](https://github.com/ClickHouse/ClickHouse/pull/62190) ([Konstantin Bogdanov](https://github.com/thevar1able)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix some crashes with analyzer and group_by_use_nulls. [#61933](https://github.com/ClickHouse/ClickHouse/pull/61933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix scalars create as select [#61998](https://github.com/ClickHouse/ClickHouse/pull/61998) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Ignore IfChainToMultiIfPass if returned type changed. [#62059](https://github.com/ClickHouse/ClickHouse/pull/62059) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix type for ConvertInToEqualPass [#62066](https://github.com/ClickHouse/ClickHouse/pull/62066) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Revert output Pretty in tty [#62090](https://github.com/ClickHouse/ClickHouse/pull/62090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -68,6 +68,12 @@ In the results of `SELECT` query, the values of `AggregateFunction` type have im
## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view}
The following examples assumes that you have a database named `test` so make sure you create that if it doesn't already exist:
```sql
CREATE DATABASE test;
```
We will create the table `test.visits` that contain the raw data:
``` sql
@ -80,17 +86,24 @@ CREATE TABLE test.visits
) ENGINE = MergeTree ORDER BY (StartDate, CounterID);
```
Next, we need to create an `AggregatingMergeTree` table that will store `AggregationFunction`s that keep track of the total number of visits and the number of unique users.
`AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type:
``` sql
CREATE MATERIALIZED VIEW test.mv_visits
(
CREATE TABLE test.agg_visits (
StartDate DateTime64 NOT NULL,
CounterID UInt64,
Visits AggregateFunction(sum, Nullable(Int32)),
Users AggregateFunction(uniq, Nullable(Int32))
)
ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID)
ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID);
```
And then let's create a materialized view that populates `test.agg_visits` from `test.visits` :
```sql
CREATE MATERIALIZED VIEW test.visits_mv TO test.agg_visits
AS SELECT
StartDate,
CounterID,
@ -104,25 +117,45 @@ Inserting data into the `test.visits` table.
``` sql
INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
VALUES (1667446031, 1, 3, 4)
INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
VALUES (1667446031, 1, 6, 3)
VALUES (1667446031000, 1, 3, 4), (1667446031000, 1, 6, 3);
```
The data is inserted in both the table and the materialized view `test.mv_visits`.
The data is inserted in both `test.visits` and `test.agg_visits`.
To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`:
``` sql
```sql
SELECT
StartDate,
sumMerge(Visits) AS Visits,
uniqMerge(Users) AS Users
FROM test.mv_visits
FROM test.agg_visits
GROUP BY StartDate
ORDER BY StartDate;
```
```text
┌───────────────StartDate─┬─Visits─┬─Users─┐
│ 2022-11-03 03:27:11.000 │ 9 │ 2 │
└─────────────────────────┴────────┴───────┘
```
And how about if we add another couple of records to `test.visits`, but this time we'll use a different timestamp for one of the records:
```sql
INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
VALUES (1669446031000, 2, 5, 10), (1667446031000, 3, 7, 5);
```
If we then run the `SELECT` query again, we'll see the following output:
```text
┌───────────────StartDate─┬─Visits─┬─Users─┐
│ 2022-11-03 03:27:11.000 │ 16 │ 3 │
│ 2022-11-26 07:00:31.000 │ 5 │ 1 │
└─────────────────────────┴────────┴───────┘
```
## Related Content
- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)

View File

@ -18,6 +18,9 @@ Run the command:
```bash
wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz
# Option: Validate the checksum
md5sum 2021_08_01_07_01_17_data.tgz
# Checksum should be equal to: db6126724de939a5481e3160a2d67d15
```
Replace the link to the up to date link from http://menus.nypl.org/data if needed.

View File

@ -79,7 +79,7 @@ The supported formats are:
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | |
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
@ -1270,12 +1270,13 @@ SELECT * FROM json_each_row_nested
- [input_format_json_read_arrays_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_arrays_as_strings) - allow to parse JSON arrays as strings in JSON input formats. Default value - `true`.
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `true`.
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
- [input_format_json_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`.
- [input_format_json_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_try_infer_numbers_from_strings) - try to infer numbers from string fields while schema inference. Default value - `false`.
- [input_format_json_try_infer_named_tuples_from_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_try_infer_named_tuples_from_objects) - try to infer named tuple from JSON objects during schema inference. Default value - `true`.
- [input_format_json_infer_incomplete_types_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_infer_incomplete_types_as_strings) - use type String for keys that contains only Nulls or empty objects/arrays during schema inference in JSON input formats. Default value - `true`.
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`.
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - ignore unknown keys in json object for named tuples. Default value - `false`.
- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [input_format_json_throw_on_bad_escape_sequence](/docs/en/operations/settings/settings-formats.md/#input_format_json_throw_on_bad_escape_sequence) - throw an exception if JSON string contains bad escape sequence. If disabled, bad escape sequences will remain as is in the data. Default value - `true`.
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
@ -1486,7 +1487,7 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000
- [output_format_pretty_max_value_width](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`.
- [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`.
- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`.
- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`.
- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `true`.
## RowBinary {#rowbinary}
@ -2464,7 +2465,7 @@ Result:
## Npy {#data-format-npy}
This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse:
This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse:
| Npy type | ClickHouse type |
|:--------:|:---------------:|
| b1 | UInt8 |

View File

@ -507,16 +507,18 @@ Example:
``` xml
<http_handlers>
<rule>
<url><![CDATA[/query_param_with_url/\w+/(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></url>
<url><![CDATA[regex:/query_param_with_url/(?P<name_1>[^/]+)]]></url>
<methods>GET</methods>
<headers>
<XXX>TEST_HEADER_VALUE</XXX>
<PARAMS_XXX><![CDATA[(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></PARAMS_XXX>
<PARAMS_XXX><![CDATA[regex:(?P<name_2>[^/]+)]]></PARAMS_XXX>
</headers>
<handler>
<type>predefined_query_handler</type>
<query>SELECT value FROM system.settings WHERE name = {name_1:String}</query>
<query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
<query>
SELECT name, value FROM system.settings
WHERE name IN ({name_1:String}, {name_2:String})
</query>
</handler>
</rule>
<defaults/>
@ -524,13 +526,13 @@ Example:
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_final_threads 2
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2'
max_final_threads 2
max_threads 1
```
:::note
In one `predefined_query_handler` only supports one `query` of an insert type.
In one `predefined_query_handler` only one `query` is supported.
:::
### dynamic_query_handler {#dynamic_query_handler}

View File

@ -436,7 +436,7 @@ Default: 0
Restriction on dropping partitions.
If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_partition_size_to_drop` (in bytes), you cant drop a partition using a [DROP PARTITION](../../sql-reference/statements/alter/partition.md#drop-partitionpart) query.
This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
Default value: 50 GB.
The value 0 means that you can drop partitions without any restrictions.
@ -518,7 +518,7 @@ Restriction on deleting tables.
If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you cant delete it using a [DROP](../../sql-reference/statements/drop.md) query or [TRUNCATE](../../sql-reference/statements/truncate.md) query.
This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
Default value: 50 GB.
The value 0 means that you can delete all tables without any restrictions.
@ -1570,7 +1570,7 @@ Restriction on deleting tables.
If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you cant delete it using a [DROP](../../sql-reference/statements/drop.md) query or [TRUNCATE](../../sql-reference/statements/truncate.md) query.
This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
Default value: 50 GB.
@ -1588,7 +1588,7 @@ Restriction on dropping partitions.
If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_partition_size_to_drop` (in bytes), you cant drop a partition using a [DROP PARTITION](../../sql-reference/statements/alter/partition.md#drop-partitionpart) query.
This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `<clickhouse-path>/flags/force_drop_table` file.
Default value: 50 GB.

View File

@ -651,6 +651,12 @@ This setting works only when setting `input_format_json_named_tuples_as_objects`
Enabled by default.
## input_format_json_throw_on_bad_escape_sequence {#input_format_json_throw_on_bad_escape_sequence}
Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data.
Enabled by default.
## output_format_json_array_of_rows {#output_format_json_array_of_rows}
Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format.
@ -1636,7 +1642,7 @@ Possible values:
- 0 — Output without row numbers.
- 1 — Output with row numbers.
Default value: `0`.
Default value: `1`.
**Example**

View File

@ -36,7 +36,7 @@ E.g. configuration option
<s3>
<type>s3</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3>
```
@ -47,7 +47,7 @@ is equal to configuration (from `24.1`):
<object_storage_type>s3</object_storage_type>
<metadata_type>local</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3>
```
@ -56,7 +56,7 @@ Configuration
<s3_plain>
<type>s3_plain</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3_plain>
```
@ -67,7 +67,7 @@ is equal to
<object_storage_type>s3</object_storage_type>
<metadata_type>plain</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3_plain>
```
@ -79,7 +79,7 @@ Example of full storage configuration will look like:
<s3>
<type>s3</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3>
</disks>
<policies>
@ -105,7 +105,7 @@ Starting with 24.1 clickhouse version, it can also look like:
<object_storage_type>s3</object_storage_type>
<metadata_type>local</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3>
</disks>
<policies>
@ -324,7 +324,7 @@ Configuration:
<s3_plain>
<type>s3_plain</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3_plain>
```
@ -337,7 +337,7 @@ Configuration:
<object_storage_type>azure</object_storage_type>
<metadata_type>plain</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_invironment_credentials>1</use_invironment_credentials>
<use_environment_credentials>1</use_environment_credentials>
</s3_plain>
```
@ -520,13 +520,13 @@ Example of configuration for versions later or equal to 22.8:
</cache>
</disks>
<policies>
<s3-cache>
<s3_cache>
<volumes>
<main>
<disk>cache</disk>
</main>
</volumes>
</s3-cache>
</s3_cache>
<policies>
</storage_configuration>
```
@ -546,13 +546,13 @@ Example of configuration for versions earlier than 22.8:
</s3>
</disks>
<policies>
<s3-cache>
<s3_cache>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3-cache>
</s3_cache>
<policies>
</storage_configuration>
```

View File

@ -47,7 +47,7 @@ An example:
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>

View File

@ -483,7 +483,7 @@ Where:
- `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition).
- `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions).
- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions).
- `r3`- the number of unique visitors who visited the site during a specific time period on 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions).
## uniqUpTo(N)(x)

View File

@ -7,26 +7,33 @@ sidebar_position: 351
[Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
:::note
For a bias corrected version of Cramer's V see: [cramersVBiasCorrected](./cramersvbiascorrected.md)
:::
**Syntax**
``` sql
cramersV(column1, column2)
```
**Arguments**
**Parameters**
- `column1` and `column2` are the columns to be compared
- `column1`: first column to be compared.
- `column2`: second column to be compared.
**Returned value**
- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association).
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
Type: always [Float64](../../../sql-reference/data-types/float.md).
**Example**
The following two columns being compared below have no association with each other, so the result of `cramersV` is 0:
Query:
``` sql
SELECT
cramersV(a, b)

View File

@ -5,31 +5,31 @@ sidebar_position: 352
# cramersVBiasCorrected
Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
**Syntax**
``` sql
cramersVBiasCorrected(column1, column2)
```
**Arguments**
**Parameters**
- `column1` and `column2` are the columns to be compared
- `column1`: first column to be compared.
- `column2`: second column to be compared.
**Returned value**
- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association).
**Return type** is always [Float64](../../../sql-reference/data-types/float.md).
Type: always [Float64](../../../sql-reference/data-types/float.md).
**Example**
The following two columns being compared below have a small association with each other. Notice the result of `cramersVBiasCorrected` is smaller than the result of `cramersV`:
Query:
``` sql
SELECT
cramersV(a, b),

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/aggregatefunction
sidebar_position: 53
sidebar_position: 46
sidebar_label: AggregateFunction
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/array
sidebar_position: 52
sidebar_position: 32
sidebar_label: Array(T)
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/boolean
sidebar_position: 43
sidebar_position: 22
sidebar_label: Boolean
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/date
sidebar_position: 47
sidebar_position: 12
sidebar_label: Date
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/date32
sidebar_position: 48
sidebar_position: 14
sidebar_label: Date32
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/datetime
sidebar_position: 48
sidebar_position: 16
sidebar_label: DateTime
---
@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isnt explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting.
## Examples
@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format)
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/datetime64
sidebar_position: 49
sidebar_position: 18
sidebar_label: DateTime64
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/decimal
sidebar_position: 42
sidebar_position: 6
sidebar_label: Decimal
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/enum
sidebar_position: 50
sidebar_position: 20
sidebar_label: Enum
---

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/fixedstring
sidebar_position: 45
sidebar_position: 10
sidebar_label: FixedString(N)
---
# FixedString
# FixedString(N)
A fixed-length string of `N` bytes (neither characters nor code points).

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/float
sidebar_position: 41
sidebar_position: 4
sidebar_label: Float32, Float64
---

View File

@ -1,8 +1,8 @@
---
slug: /en/sql-reference/data-types/geo
sidebar_position: 62
sidebar_position: 54
sidebar_label: Geo
title: "Geo Data Types"
title: "Geometric"
---
ClickHouse supports data types for representing geographical objects — locations, lands, etc.

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/
sidebar_label: List of data types
sidebar_position: 37
sidebar_position: 1
---
# ClickHouse Data Types
# Data Types in ClickHouse
ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/int-uint
sidebar_position: 40
sidebar_position: 2
sidebar_label: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/ipv4
sidebar_position: 59
sidebar_position: 28
sidebar_label: IPv4
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/ipv6
sidebar_position: 60
sidebar_position: 30
sidebar_label: IPv6
---

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/json
sidebar_position: 54
sidebar_position: 26
sidebar_label: JSON
---

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/lowcardinality
sidebar_position: 51
sidebar_label: LowCardinality
sidebar_position: 42
sidebar_label: LowCardinality(T)
---
# LowCardinality
# LowCardinality(T)
Changes the internal representation of other data types to be dictionary-encoded.

View File

@ -1,12 +1,12 @@
---
slug: /en/sql-reference/data-types/map
sidebar_position: 65
sidebar_label: Map(key, value)
sidebar_position: 36
sidebar_label: Map(K, V)
---
# Map(key, value)
# Map(K, V)
`Map(key, value)` data type stores `key:value` pairs.
`Map(K, V)` data type stores `key:value` pairs.
**Parameters**

View File

@ -1,27 +0,0 @@
---
slug: /en/sql-reference/data-types/multiword-types
sidebar_position: 61
sidebar_label: Multiword Type Names
title: "Multiword Types"
---
When creating tables, you can use data types with a name consisting of several words. This is implemented for better SQL compatibility.
## Multiword Types Support
| Multiword types | Simple types |
|----------------------------------|--------------------------------------------------------------|
| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) |
| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) |
| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| BINARY VARYING | [String](../../sql-reference/data-types/string.md) |

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/data-types/nullable
sidebar_position: 55
sidebar_label: Nullable
sidebar_position: 44
sidebar_label: Nullable(T)
---
# Nullable(T)

View File

@ -1,5 +1,7 @@
---
slug: /en/sql-reference/data-types/simpleaggregatefunction
sidebar_position: 48
sidebar_label: SimpleAggregateFunction
---
# SimpleAggregateFunction

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/string
sidebar_position: 44
sidebar_position: 8
sidebar_label: String
---
@ -13,7 +13,7 @@ When creating tables, numeric parameters for string fields can be set (e.g. `VAR
Aliases:
- `String``LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`.
- `String``LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`, `CHAR LARGE OBJECT`, `CHAR VARYING`, `CHARACTER LARGE OBJECT`, `CHARACTER VARYING`, `NCHAR LARGE OBJECT`, `NCHAR VARYING`, `NATIONAL CHARACTER LARGE OBJECT`, `NATIONAL CHARACTER VARYING`, `NATIONAL CHAR VARYING`, `NATIONAL CHARACTER`, `NATIONAL CHAR`, `BINARY LARGE OBJECT`, `BINARY VARYING`,
## Encodings

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/tuple
sidebar_position: 54
sidebar_position: 34
sidebar_label: Tuple(T1, T2, ...)
---
# Tuple(T1, T2, )
# Tuple(T1, T2, ...)
A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element.

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/uuid
sidebar_position: 46
sidebar_position: 24
sidebar_label: UUID
---

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/data-types/variant
sidebar_position: 55
sidebar_label: Variant
sidebar_position: 40
sidebar_label: Variant(T1, T2, ...)
---
# Variant(T1, T2, T3, ...)
# Variant(T1, T2, ...)
This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type
has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value).
@ -190,22 +190,67 @@ SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
└─────────────────────────────────────────────────────────────────────┘
```
## Conversion between Variant column and other columns
## Conversion between a Variant column and other columns
There are 3 possible conversions that can be performed with Variant column.
There are 4 possible conversions that can be performed with a column of type `Variant`.
### Converting an ordinary column to a Variant column
### Converting a String column to a Variant column
It is possible to convert ordinary column with type `T` to a `Variant` column containing this type:
Conversion from `String` to `Variant` is performed by parsing a value of `Variant` type from the string value:
```sql
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
SELECT '42'::Variant(String, UInt64) as variant, variantType(variant) as variant_type
```
```text
┌─type_name──────────────────────────────┬─variant───────┐
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
└────────────────────────────────────────┴───────────────┘
┌─variant─┬─variant_type─┐
│ 42 │ UInt64 │
└─────────┴──────────────┘
```
```sql
SELECT '[1, 2, 3]'::Variant(String, Array(UInt64)) as variant, variantType(variant) as variant_type
```
```text
┌─variant─┬─variant_type──┐
│ [1,2,3] │ Array(UInt64) │
└─────────┴───────────────┘
```
```sql
SELECT CAST(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01'), 'Map(String, Variant(UInt64, Bool, Date))') as map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) as map_of_variant_types```
```
```text
┌─map_of_variants─────────────────────────────┬─map_of_variant_types──────────────────────────┐
│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'UInt64','key2':'Bool','key3':'Date'} │
└─────────────────────────────────────────────┴───────────────────────────────────────────────┘
```
### Converting an ordinary column to a Variant column
It is possible to convert an ordinary column with type `T` to a `Variant` column containing this type:
```sql
SELECT toTypeName(variant) as type_name, [1,2,3]::Array(UInt64)::Variant(UInt64, String, Array(UInt64)) as variant, variantType(variant) as variant_name
```
```text
┌─type_name──────────────────────────────┬─variant─┬─variant_name──┐
│ Variant(Array(UInt64), String, UInt64) │ [1,2,3] │ Array(UInt64) │
└────────────────────────────────────────┴─────────┴───────────────┘
```
Note: converting from `String` type is always performed through parsing, if you need to convert `String` column to `String` variant of a `Variant` without parsing, you can do the following:
```sql
SELECT '[1, 2, 3]'::Variant(String)::Variant(String, Array(UInt64), UInt64) as variant, variantType(variant) as variant_type
```
```sql
┌─variant───┬─variant_type─┐
│ [1, 2, 3] │ String │
└───────────┴──────────────┘
```
### Converting a Variant column to an ordinary column
@ -395,3 +440,37 @@ SELECT v, variantType(v) FROM test ORDER by v;
│ 100 │ UInt32 │
└─────┴────────────────┘
```
## JSONExtract functions with Variant
All `JSONExtract*` functions support `Variant` type:
```sql
SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(UInt32, String, Array(UInt32))') AS variant, variantType(variant) AS variant_type;
```
```text
┌─variant─┬─variant_type──┐
│ [1,2,3] │ Array(UInt32) │
└─────────┴───────────────┘
```
```sql
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) AS map_of_variant_types
```
```text
┌─map_of_variants──────────────────┬─map_of_variant_types────────────────────────────┐
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
└──────────────────────────────────┴─────────────────────────────────────────────────┘
```
```sql
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS variants, arrayMap(x -> (x.1, variantType(x.2)), variants) AS variant_types
```
```text
┌─variants───────────────────────────────┬─variant_types─────────────────────────────────────────┐
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
```

View File

@ -1670,7 +1670,7 @@ Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../.
## age
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 nanosecond.
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
For an alternative to `age`, see function `date\_diff`.
@ -1686,16 +1686,17 @@ age('unit', startdate, enddate, [timezone])
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `microsecond` `microseconds` `us` `u`
- `millisecond` `milliseconds` `ms`
- `second` `seconds` `ss` `s`
- `minute` `minutes` `mi` `n`
- `hour` `hours` `hh` `h`
- `day` `days` `dd` `d`
- `week` `weeks` `wk` `ww`
- `month` `months` `mm` `m`
- `quarter` `quarters` `qq` `q`
- `year` `years` `yyyy` `yy`
- `nanosecond`, `nanoseconds`, `ns`
- `microsecond`, `microseconds`, `us`, `u`
- `millisecond`, `milliseconds`, `ms`
- `second`, `seconds`, `ss`, `s`
- `minute`, `minutes`, `mi`, `n`
- `hour`, `hours`, `hh`, `h`
- `day`, `days`, `dd`, `d`
- `week`, `weeks`, `wk`, `ww`
- `month`, `months`, `mm`, `m`
- `quarter`, `quarters`, `qq`, `q`
- `year`, `years`, `yyyy`, `yy`
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
@ -1763,16 +1764,17 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `microsecond` `microseconds` `us` `u`
- `millisecond` `milliseconds` `ms`
- `second` `seconds` `ss` `s`
- `minute` `minutes` `mi` `n`
- `hour` `hours` `hh` `h`
- `day` `days` `dd` `d`
- `week` `weeks` `wk` `ww`
- `month` `months` `mm` `m`
- `quarter` `quarters` `qq` `q`
- `year` `years` `yyyy` `yy`
- `nanosecond`, `nanoseconds`, `ns`
- `microsecond`, `microseconds`, `us`, `u`
- `millisecond`, `milliseconds`, `ms`
- `second`, `seconds`, `ss`, `s`
- `minute`, `minutes`, `mi`, `n`
- `hour`, `hours`, `hh`, `h`
- `day`, `days`, `dd`, `d`
- `week`, `weeks`, `wk`, `ww`
- `month`, `months`, `mm`, `m`
- `quarter`, `quarters`, `qq`, `q`
- `year`, `years`, `yyyy`, `yy`
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

View File

@ -543,12 +543,64 @@ You can get similar result by using the [ternary operator](../../sql-reference/f
Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0.
## hasColumnInTable(\[hostname\[, username\[, password\]\],\] database, table, column)
## hasColumnInTable
Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0.
**Syntax**
```sql
hasColumnInTable(\[hostname\[, username\[, password\]\],\] database, table, column)
```
**Parameters**
- `database` : name of the database. [String literal](../syntax#syntax-string-literal)
- `table` : name of the table. [String literal](../syntax#syntax-string-literal)
- `column` : name of the column. [String literal](../syntax#syntax-string-literal)
- `hostname` : remote server name to perform the check on. [String literal](../syntax#syntax-string-literal)
- `username` : username for remote server. [String literal](../syntax#syntax-string-literal)
- `password` : password for remote server. [String literal](../syntax#syntax-string-literal)
**Returned value**
- `1` if the given column exists.
- `0`, otherwise.
**Implementation details**
Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. If parameter `hostname` is given, the check is performed on a remote server.
If the table does not exist, an exception is thrown.
For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0.
**Example**
Query:
```sql
SELECT hasColumnInTable('system','metrics','metric')
```
```response
1
```
```sql
SELECT hasColumnInTable('system','metrics','non-existing_column')
```
```response
0
```
## hasThreadFuzzer
Returns whether Thread Fuzzer is effective. It can be used in tests to prevent runs from being too long.
**Syntax**
```sql
hasThreadFuzzer();
```
## bar
Builds a bar chart.

View File

@ -99,7 +99,7 @@ Alias: `OCTET_LENGTH`
Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Alias:
- `CHAR_LENGTH``
- `CHAR_LENGTH`
- `CHARACTER_LENGTH`
## leftPad

View File

@ -74,6 +74,8 @@ SELECT
position('Hello, world!', 'o', 7)
```
Result:
``` text
┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐
│ 5 │ 9 │
@ -479,9 +481,9 @@ Alias: `haystack NOT ILIKE pattern` (operator)
## ngramDistance
Calculates the 4-gram distance between a `haystack` string and a `needle` string. For that, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a Float32 between 0 and 1. The smaller the result is, the more strings are similar to each other. Throws an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any of non-constant `haystack` or `needle` arguments is more than 32Kb in size, the distance is always 1.
Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other.
Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
Functions [`ngramDistanceCaseInsensitive`](#ngramdistancecaseinsensitive), [`ngramDistanceUTF8`](#ngramdistanceutf8), [`ngramDistanceCaseInsensitiveUTF8`](#ngramdistancecaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
@ -489,15 +491,170 @@ Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseIns
ngramDistance(haystack, needle)
```
**Parameters**
- `haystack`: First comparison string. [String literal](../syntax#string)
- `needle`: Second comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
**Implementation details**
This function will throw an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any non-constant `haystack` or `needle` arguments are more than 32Kb in size, then the distance is always 1.
**Examples**
The more similar two strings are to each other, the closer the result will be to 0 (identical).
Query:
```sql
SELECT ngramDistance('ClickHouse','ClickHouse!');
```
Result:
```response
0.06666667
```
The less similar two strings are to each, the larger the result will be.
Query:
```sql
SELECT ngramDistance('ClickHouse','House');
```
Result:
```response
0.5555556
```
## ngramDistanceCaseInsensitive
Provides a case-insensitive variant of [ngramDistance](#ngramdistance).
**Syntax**
```sql
ngramDistanceCaseInsensitive(haystack, needle)
```
**Parameters**
- `haystack`: First comparison string. [String literal](../syntax#string)
- `needle`: Second comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
**Examples**
With [ngramDistance](#ngramdistance) differences in case will affect the similarity value:
Query:
```sql
SELECT ngramDistance('ClickHouse','clickhouse');
```
Result:
```response
0.71428573
```
With [ngramDistanceCaseInsensitive](#ngramdistancecaseinsensitive) case is ignored so two identical strings differing only in case will now return a low similarity value:
Query:
```sql
SELECT ngramDistanceCaseInsensitive('ClickHouse','clickhouse');
```
Result:
```response
0
```
## ngramDistanceUTF8
Provides a UTF-8 variant of [ngramDistance](#ngramdistance). Assumes that `needle` and `haystack` strings are UTF-8 encoded strings.
**Syntax**
```sql
ngramDistanceUTF8(haystack, needle)
```
**Parameters**
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
**Example**
Query:
```sql
SELECT ngramDistanceUTF8('abcde','cde');
```
Result:
```response
0.5
```
## ngramDistanceCaseInsensitiveUTF8
Provides a case-insensitive variant of [ngramDistanceUTF8](#ngramdistanceutf8).
**Syntax**
```sql
ngramDistanceCaseInsensitiveUTF8(haystack, needle)
```
**Parameters**
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
**Example**
Query:
```sql
SELECT ngramDistanceCaseInsensitiveUTF8('abcde','CDE');
```
Result:
```response
0.5
```
## ngramSearch
Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`.
Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex).
Functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function.
:::note
The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one this works for Latin and mostly for all Cyrillic letters.
:::
Functions [`ngramSearchCaseInsensitive`](#ngramsearchcaseinsensitive), [`ngramSearchUTF8`](#ngramsearchutf8), [`ngramSearchCaseInsensitiveUTF8`](#ngramsearchcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax**
@ -505,6 +662,140 @@ The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram
ngramSearch(haystack, needle)
```
**Parameters**
- `haystack`: First comparison string. [String literal](../syntax#string)
- `needle`: Second comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
**Implementation details**
:::note
The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one this works for Latin and mostly for all Cyrillic letters.
:::
**Example**
Query:
```sql
SELECT ngramSearch('Hello World','World Hello');
```
Result:
```response
0.5
```
## ngramSearchCaseInsensitive
Provides a case-insensitive variant of [ngramSearch](#ngramSearch).
**Syntax**
```sql
ngramSearchCaseInsensitive(haystack, needle)
```
**Parameters**
- `haystack`: First comparison string. [String literal](../syntax#string)
- `needle`: Second comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
The bigger the result is, the more likely `needle` is in the `haystack`.
**Example**
Query:
```sql
SELECT ngramSearchCaseInsensitive('Hello World','hello');
```
Result:
```response
1
```
## ngramSearchUTF8
Provides a UTF-8 variant of [ngramSearch](#ngramsearch) in which `needle` and `haystack` are assumed to be UTF-8 encoded strings.
**Syntax**
```sql
ngramSearchUTF8(haystack, needle)
```
**Parameters**
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
The bigger the result is, the more likely `needle` is in the `haystack`.
**Example**
Query:
```sql
SELECT ngramSearchUTF8('абвгдеёжз', 'гдеёзд');
```
Result:
```response
0.5
```
## ngramSearchCaseInsensitiveUTF8
Provides a case-insensitive variant of [ngramSearchUTF8](#ngramsearchutf8) in which `needle` and `haystack`.
**Syntax**
```sql
ngramSearchCaseInsensitiveUTF8(haystack, needle)
```
**Parameters**
- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string)
- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string)
**Returned value**
- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
The bigger the result is, the more likely `needle` is in the `haystack`.
**Example**
Query:
```sql
SELECT ngramSearchCaseInsensitiveUTF8('абвГДЕёжз', 'АбвгдЕЁжз');
```
Result:
```response
0.57142854
```
## countSubstrings
Returns how often substring `needle` occurs in string `haystack`.
@ -610,7 +901,7 @@ Like `countMatches(haystack, pattern)` but matching ignores the case.
## regexpExtract
Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index.
Extracts the first string in `haystack` that matches the regexp pattern and corresponds to the regex group index.
**Syntax**
@ -652,7 +943,7 @@ Result:
## hasSubsequence
Returns 1 if needle is a subsequence of haystack, or 0 otherwise.
Returns 1 if `needle` is a subsequence of `haystack`, or 0 otherwise.
A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements.
@ -676,8 +967,10 @@ Type: `UInt8`.
**Examples**
Query:
``` sql
SELECT hasSubsequence('garbage', 'arg') ;
SELECT hasSubsequence('garbage', 'arg');
```
Result:
@ -692,10 +985,263 @@ Result:
Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
**Syntax**
``` sql
hasSubsequenceCaseInsensitive(haystack, needle)
```
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
**Returned values**
- 1, if needle is a subsequence of haystack.
- 0, otherwise.
Type: `UInt8`.
**Examples**
Query:
``` sql
SELECT hasSubsequenceCaseInsensitive('garbage', 'ARG');
```
Result:
``` text
┌─hasSubsequenceCaseInsensitive('garbage', 'ARG')─┐
│ 1 │
└─────────────────────────────────────────────────┘
```
## hasSubsequenceUTF8
Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
**Syntax**
``` sql
hasSubsequenceUTF8(haystack, needle)
```
**Arguments**
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
**Returned values**
- 1, if needle is a subsequence of haystack.
- 0, otherwise.
Type: `UInt8`.
Query:
**Examples**
``` sql
select hasSubsequenceUTF8('ClickHouse - столбцовая система управления базами данных', 'система');
```
Result:
``` text
┌─hasSubsequenceUTF8('ClickHouse - столбцовая система управления базами данных', 'система')─┐
│ 1 │
└───────────────────────────────────────────────────────────────────────────────────────────┘
```
## hasSubsequenceCaseInsensitiveUTF8
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
**Syntax**
``` sql
hasSubsequenceCaseInsensitiveUTF8(haystack, needle)
```
**Arguments**
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
**Returned values**
- 1, if needle is a subsequence of haystack.
- 0, otherwise.
Type: `UInt8`.
**Examples**
Query:
``` sql
select hasSubsequenceCaseInsensitiveUTF8('ClickHouse - столбцовая система управления базами данных', 'СИСТЕМА');
```
Result:
``` text
┌─hasSubsequenceCaseInsensitiveUTF8('ClickHouse - столбцовая система управления базами данных', 'СИСТЕМА')─┐
│ 1 │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## hasToken
Returns 1 if a given token is present in a haystack, or 0 otherwise.
**Syntax**
```sql
hasToken(haystack, token)
```
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
- 1, if the token is present in the haystack.
- 0, if the token is not present.
**Implementation details**
Token must be a constant string. Supported by tokenbf_v1 index specialization.
**Example**
Query:
```sql
SELECT hasToken('Hello World','Hello');
```
```response
1
```
## hasTokenOrNull
Returns 1 if a given token is present, 0 if not present, and null if the token is ill-formed.
**Syntax**
```sql
hasTokenOrNull(haystack, token)
```
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
- 1, if the token is present in the haystack.
- 0, if the token is not present in the haystack.
- null, if the token is ill-formed.
**Implementation details**
Token must be a constant string. Supported by tokenbf_v1 index specialization.
**Example**
Where `hasToken` would throw an error for an ill-formed token, `hasTokenOrNull` returns `null` for an ill-formed token.
Query:
```sql
SELECT hasTokenOrNull('Hello World','Hello,World');
```
```response
null
```
## hasTokenCaseInsensitive
Returns 1 if a given token is present in a haystack, 0 otherwise. Ignores case.
**Syntax**
```sql
hasTokenCaseInsensitive(haystack, token)
```
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
- 1, if the token is present in the haystack.
- 0, otherwise.
**Implementation details**
Token must be a constant string. Supported by tokenbf_v1 index specialization.
**Example**
Query:
```sql
SELECT hasTokenCaseInsensitive('Hello World','hello');
```
```response
1
```
## hasTokenCaseInsensitiveOrNull
Returns 1 if a given token is present in a haystack, 0 otherwise. Ignores case and returns null if the token is ill-formed.
**Syntax**
```sql
hasTokenCaseInsensitiveOrNull(haystack, token)
```
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
- 1, if the token is present in the haystack.
- 0, if token is not present.
- null, if the token is ill-formed.
**Implementation details**
Token must be a constant string. Supported by tokenbf_v1 index specialization.
**Example**
Where `hasTokenCaseInsensitive` would throw an error for an ill-formed token, `hasTokenCaseInsensitiveOrNull` returns `null` for an ill-formed token.
Query:
```sql
SELECT hasTokenCaseInsensitiveOrNull('Hello World','hello,world');
```
```response
null
```

View File

@ -56,7 +56,9 @@ Entries for finished mutations are not deleted right away (the number of preserv
For non-replicated tables, all `ALTER` queries are performed synchronously. For replicated tables, the query just adds instructions for the appropriate actions to `ZooKeeper`, and the actions themselves are performed as soon as possible. However, the query can wait for these actions to be completed on all the replicas.
For all `ALTER` queries, you can use the [alter_sync](/docs/en/operations/settings/settings.md/#alter-sync) setting to set up waiting.
For `ALTER` queries that creates mutations (e.g.: including, but not limited to `UPDATE`, `DELETE`, `MATERIALIZE INDEX`, `MATERIALIZE PROJECTION`, `MATERIALIZE COLUMN`, `APPLY DELETED MASK`, `CLEAR STATISTIC`, `MATERIALIZE STATISTIC`) the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting.
For other `ALTER` queries which only modify the metadata, you can use the [alter_sync](/docs/en/operations/settings/settings.md/#alter-sync) setting to set up waiting.
You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](/docs/en/operations/settings/settings.md/#replication-wait-for-inactive-replica-timeout) setting.
@ -64,8 +66,6 @@ You can specify how long (in seconds) to wait for inactive replicas to execute a
For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown.
:::
For `ALTER TABLE ... UPDATE|DELETE|MATERIALIZE INDEX|MATERIALIZE PROJECTION|MATERIALIZE COLUMN` queries the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting.
## Related content
- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)

View File

@ -8,7 +8,7 @@ sidebar_label: VIEW
You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
**Example with TO table**

View File

@ -20,19 +20,22 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
## DROP TABLE
Deletes the table.
In case when `IF EMPTY` clause is specified server will check if table is empty only on replica that received initial query.
Deletes one or more tables.
:::tip
Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
To undo the deletion of a table, please see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
:::
Syntax:
``` sql
DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db.]name [ON CLUSTER cluster] [SYNC]
DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db1.]name_1[, [db2.]name_2, ...] [ON CLUSTER cluster] [SYNC]
```
Limitations:
- If the clause `IF EMPTY` is specified, the server checks the emptiness of the table only on the replica which received the query.
- Deleting multiple tables at once is not an atomic operation, i.e. if the deletion of a table fails, subsequent tables will not be deleted.
## DROP DICTIONARY
Deletes the dictionary.

View File

@ -64,6 +64,14 @@ RELOAD FUNCTIONS [ON CLUSTER cluster_name]
RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
```
## RELOAD ASYNCHRONOUS METRICS
Re-calculates all [asynchronous metrics](../../operations/system-tables/asynchronous_metrics.md). Since asynchronous metrics are periodically updated based on setting [asynchronous_metrics_update_period_s](../../operations/server-configuration-parameters/settings.md), updating them manually using this statement is typically not necessary.
```sql
RELOAD ASYNCHRONOUS METRICS [ON CLUSTER cluster_name]
```
## DROP DNS CACHE
Clears ClickHouses internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).

View File

@ -23,9 +23,16 @@ You can specify how long (in seconds) to wait for inactive replicas to execute `
If the `alter_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown.
:::
## TRUNCATE ALL TABLES
``` sql
TRUNCATE ALL TABLES [IF EXISTS] db [ON CLUSTER cluster]
```
Removes all data from all tables in a database.
## TRUNCATE DATABASE
``` sql
TRUNCATE DATABASE [IF EXISTS] [db.]name [ON CLUSTER cluster]
TRUNCATE DATABASE [IF EXISTS] db [ON CLUSTER cluster]
```
Removes all tables from a database but keeps the database itself. When the clause `IF EXISTS` is omitted, the query returns an error if the database does not exist.

View File

@ -12,25 +12,23 @@ Some of the calculations that you can do are similar to those that can be done w
ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported.
| Feature | Support or workaround |
| Feature | Supported? |
|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | supported |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported |
| `ROWS` frame | supported |
| `RANGE` frame | supported, the default |
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead (`RANGE` works with any numeric type). |
| `GROUPS` frame | not supported |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
| `rank()`, `dense_rank()`, `row_number()` | supported |
| `lag/lead(value, offset)` | Not supported. Workarounds: |
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ |
| `ROWS` frame | ✅ |
| `RANGE` frame | ✅ (the default) |
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) |
| `GROUPS` frame | ❌ |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) |
| `rank()`, `dense_rank()`, `row_number()` | ✅ |
| `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
## ClickHouse-specific Window Functions
There are also the following window function that's specific to ClickHouse:
There is also the following ClickHouse specific window function:
### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS])
@ -89,6 +87,102 @@ These functions can be used only as a window function.
Let's have a look at some examples of how window functions can be used.
### Numbering rows
```sql
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M');
```
```sql
SELECT player, salary,
row_number() OVER (ORDER BY salary) AS row
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─row─┐
│ Michael Stanley │ 150000 │ 1 │
│ Scott Harrison │ 150000 │ 2 │
│ Charles Juarez │ 190000 │ 3 │
│ Gary Chen │ 195000 │ 4 │
│ Robert George │ 195000 │ 5 │
└─────────────────┴────────┴─────┘
```
```sql
SELECT player, salary,
row_number() OVER (ORDER BY salary) AS row,
rank() OVER (ORDER BY salary) AS rank,
dense_rank() OVER (ORDER BY salary) AS denseRank
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─row─┬─rank─┬─denseRank─┐
│ Michael Stanley │ 150000 │ 1 │ 1 │ 1 │
│ Scott Harrison │ 150000 │ 2 │ 1 │ 1 │
│ Charles Juarez │ 190000 │ 3 │ 3 │ 2 │
│ Gary Chen │ 195000 │ 4 │ 4 │ 3 │
│ Robert George │ 195000 │ 5 │ 4 │ 3 │
└─────────────────┴────────┴─────┴──────┴───────────┘
```
### Aggregation functions
Compare each player's salary to the average for their team.
```sql
SELECT player, salary, team,
avg(salary) OVER (PARTITION BY team) AS teamAvg,
salary - teamAvg AS diff
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐
│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 170000 │ 20000 │
│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 170000 │ -20000 │
│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │
│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 180000 │ -30000 │
│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │
└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘
```
Compare each player's salary to the maximum for their team.
```sql
SELECT player, salary, team,
max(salary) OVER (PARTITION BY team) AS teamAvg,
salary - teamAvg AS diff
FROM salaries;
```
```text
┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐
│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 190000 │ 0 │
│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 190000 │ -40000 │
│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │
│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 195000 │ -45000 │
│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │
└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘
```
### Partitioning by column
```sql
CREATE TABLE wf_partition
(
@ -120,6 +214,8 @@ ORDER BY
└──────────┴───────┴───────┴──────────────┘
```
### Frame bounding
```sql
CREATE TABLE wf_frame
(
@ -131,14 +227,19 @@ ENGINE = Memory;
INSERT INTO wf_frame FORMAT Values
(1,1,1), (1,2,2), (1,3,3), (1,4,4), (1,5,5);
```
-- frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
```sql
-- Frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -151,7 +252,9 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4,5] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- short form - no bound expression, no order by
SELECT
part_key,
@ -169,14 +272,19 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4,5] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
-- frame is bounded by the beggining of a partition and the current row
```sql
-- frame is bounded by the beginning of a partition and the current row
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -189,8 +297,10 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
-- short form (frame is bounded by the beggining of a partition and the current row)
```sql
-- short form (frame is bounded by the beginning of a partition and the current row)
SELECT
part_key,
value,
@ -207,8 +317,10 @@ ORDER BY
│ 1 │ 4 │ 4 │ [1,2,3,4] │
│ 1 │ 5 │ 5 │ [1,2,3,4,5] │
└──────────┴───────┴───────┴──────────────┘
```
-- frame is bounded by the beggining of a partition and the current row, but order is backward
```sql
-- frame is bounded by the beginning of a partition and the current row, but order is backward
SELECT
part_key,
value,
@ -225,14 +337,19 @@ ORDER BY
│ 1 │ 4 │ 4 │ [5,4] │
│ 1 │ 5 │ 5 │ [5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- sliding frame - 1 PRECEDING ROW AND CURRENT ROW
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -245,14 +362,19 @@ ORDER BY
│ 1 │ 4 │ 4 │ [3,4] │
│ 1 │ 5 │ 5 │ [4,5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- sliding frame - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING
SELECT
part_key,
value,
order,
groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values
groupArray(value) OVER (
PARTITION BY part_key
ORDER BY order ASC
Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING
) AS frame_values
FROM wf_frame
ORDER BY
part_key ASC,
@ -264,7 +386,9 @@ ORDER BY
│ 1 │ 4 │ 4 │ [3,4,5] │
│ 1 │ 5 │ 5 │ [4,5] │
└──────────┴───────┴───────┴──────────────┘
```
```sql
-- row_number does not respect the frame, so rn_1 = rn_2 = rn_3 != rn_4
SELECT
part_key,
@ -278,8 +402,11 @@ SELECT
FROM wf_frame
WINDOW
w1 AS (PARTITION BY part_key ORDER BY order DESC),
w2 AS (PARTITION BY part_key ORDER BY order DESC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW)
w2 AS (
PARTITION BY part_key
ORDER BY order DESC
Rows BETWEEN 1 PRECEDING AND CURRENT ROW
)
ORDER BY
part_key ASC,
value ASC;
@ -290,7 +417,9 @@ ORDER BY
│ 1 │ 4 │ 4 │ [5,4] │ 2 │ 2 │ 2 │ 2 │
│ 1 │ 5 │ 5 │ [5] │ 1 │ 1 │ 1 │ 1 │
└──────────┴───────┴───────┴──────────────┴──────┴──────┴──────┴──────┘
```
```sql
-- first_value and last_value respect the frame
SELECT
groupArray(value) OVER w1 AS frame_values_1,
@ -313,7 +442,9 @@ ORDER BY
│ [1,2,3,4] │ 1 │ 4 │ [3,4] │ 3 │ 4 │
│ [1,2,3,4,5] │ 1 │ 5 │ [4,5] │ 4 │ 5 │
└────────────────┴───────────────┴──────────────┴────────────────┴───────────────┴──────────────┘
```
```sql
-- second value within the frame
SELECT
groupArray(value) OVER w1 AS frame_values_1,
@ -330,7 +461,9 @@ ORDER BY
│ [1,2,3,4] │ 2 │
│ [2,3,4,5] │ 3 │
└────────────────┴──────────────┘
```
```sql
-- second value within the frame + Null for missing values
SELECT
groupArray(value) OVER w1 AS frame_values_1,
@ -351,7 +484,9 @@ ORDER BY
## Real world examples
### Maximum/total salary per department.
The following examples solve common real-world problems.
### Maximum/total salary per department
```sql
CREATE TABLE employees
@ -369,7 +504,9 @@ INSERT INTO employees FORMAT Values
('IT', 'Tim', 200),
('IT', 'Anna', 300),
('IT', 'Elen', 500);
```
```sql
SELECT
department,
employee_name AS emp,
@ -386,8 +523,10 @@ FROM
max(salary) OVER wndw AS max_salary_per_dep,
sum(salary) OVER wndw AS total_salary_per_dep
FROM employees
WINDOW wndw AS (PARTITION BY department
rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
WINDOW wndw AS (
PARTITION BY department
rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ORDER BY
department ASC,
employee_name ASC
@ -403,7 +542,7 @@ FROM
└────────────┴──────┴────────┴────────────────────┴──────────────────────┴──────────────────┘
```
### Cumulative sum.
### Cumulative sum
```sql
CREATE TABLE warehouse
@ -421,7 +560,9 @@ INSERT INTO warehouse VALUES
('sku1', '2020-01-01', 1),
('sku1', '2020-02-01', 1),
('sku1', '2020-03-01', 1);
```
```sql
SELECT
item,
ts,
@ -461,13 +602,18 @@ insert into sensors values('cpu_temp', '2020-01-01 00:00:00', 87),
('cpu_temp', '2020-01-01 00:00:05', 87),
('cpu_temp', '2020-01-01 00:00:06', 87),
('cpu_temp', '2020-01-01 00:00:07', 87);
```
```sql
SELECT
metric,
ts,
value,
avg(value) OVER
(PARTITION BY metric ORDER BY ts ASC Rows BETWEEN 2 PRECEDING AND CURRENT ROW)
AS moving_avg_temp
avg(value) OVER (
PARTITION BY metric
ORDER BY ts ASC
Rows BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg_temp
FROM sensors
ORDER BY
metric ASC,
@ -536,7 +682,9 @@ insert into sensors values('ambient_temp', '2020-01-01 00:00:00', 16),
('ambient_temp', '2020-03-01 12:00:00', 16),
('ambient_temp', '2020-03-01 12:00:00', 16),
('ambient_temp', '2020-03-01 12:00:00', 16);
```
```sql
SELECT
metric,
ts,

View File

@ -434,16 +434,18 @@ $ curl -v 'http://localhost:8123/predefined_query'
``` xml
<http_handlers>
<rule>
<url><![CDATA[regex:/query_param_with_url/\w+/(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></url>
<url><![CDATA[regex:/query_param_with_url/(?P<name_1>[^/]+)]]></url>
<methods>GET</methods>
<headers>
<XXX>TEST_HEADER_VALUE</XXX>
<PARAMS_XXX><![CDATA[(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></PARAMS_XXX>
<PARAMS_XXX><![CDATA[regex:(?P<name_2>[^/]+)]]></PARAMS_XXX>
</headers>
<handler>
<type>predefined_query_handler</type>
<query>SELECT value FROM system.settings WHERE name = {name_1:String}</query>
<query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
<query>
SELECT name, value FROM system.settings
WHERE name IN ({name_1:String}, {name_2:String})
</query>
</handler>
</rule>
<defaults/>
@ -451,13 +453,13 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_final_threads 2
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2'
max_final_threads 2
max_threads 1
```
:::note Предупреждение
В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`.
В одном `predefined_query_handler` поддерживается только один запрос.
:::
### dynamic_query_handler {#dynamic_query_handler}

View File

@ -2776,7 +2776,7 @@ SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes;
- 0 — номера строк не выводятся.
- 1 — номера строк выводятся.
Значение по умолчанию: `0`.
Значение по умолчанию: `1`.
**Пример**
@ -2798,7 +2798,7 @@ SELECT TOP 3 name, value FROM system.settings;
```
### output_format_pretty_color {#output_format_pretty_color}
Включает/выключает управляющие последовательности ANSI в форматах Pretty.
Включает/выключает управляющие последовательности ANSI в форматах Pretty.
Возможные значения:
@ -4123,7 +4123,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca
## session_timezone {#session_timezone}
Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан часовой пояс, будут интерпретированы как относящиеся к указанной зоне.
При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера.
При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера.
Функции `timeZone()` and `serverTimezone()` возвращают часовой пояс текущей сессии и сервера соответственно.

View File

@ -476,7 +476,7 @@ FROM
- `r1` - количество уникальных посетителей за 2020-01-01 (`cond1`).
- `r2` - количество уникальных посетителей в период между 2020-01-01 и 2020-01-02 (`cond1` и `cond2`).
- `r3` - количество уникальных посетителей в период между 2020-01-01 и 2020-01-03 (`cond1` и `cond3`).
- `r3` - количество уникальных посетителей в период за 2020-01-01 и 2020-01-03 (`cond1` и `cond3`).
## uniqUpTo(N)(x) {#uniquptonx}

View File

@ -120,7 +120,7 @@ FROM dt
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
- [Функции для работы с массивами](../../sql-reference/functions/array-functions.md)
- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format)
- [Настройка `date_time_output_format`](../../operations/settings/index.md)
- [Настройка `date_time_output_format`](../../operations/settings/index.md#settings-date_time_output_format)
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)

View File

@ -1,28 +0,0 @@
---
slug: /ru/sql-reference/data-types/multiword-types
sidebar_position: 61
sidebar_label: Составные типы
---
# Составные типы {#multiword-types}
При создании таблиц вы можете использовать типы данных с названием, состоящим из нескольких слов. Такие названия поддерживаются для лучшей совместимости с SQL.
## Поддержка составных типов {#multiword-types-support}
| Составные типы | Обычные типы |
|-------------------------------------|-----------------------------------------------------------|
| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) |
| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) |
| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| BINARY VARYING | [String](../../sql-reference/data-types/string.md) |

View File

@ -627,7 +627,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
## age
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 наносекунду.
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
**Синтаксис**
@ -641,6 +641,7 @@ age('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
- `nanosecond` (возможные сокращения: `ns`)
- `microsecond` (возможные сокращения: `us`, `u`)
- `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`)
@ -716,6 +717,7 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения:
- `nanosecond` (возможные сокращения: `ns`)
- `microsecond` (возможные сокращения: `us`, `u`)
- `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`)

View File

@ -1,196 +0,0 @@
---
slug: /zh/engines/database-engines/materialize-mysql
sidebar_position: 29
sidebar_label: "[experimental] MaterializedMySQL"
---
# [experimental] MaterializedMySQL {#materialized-mysql}
**这是一个实验性的特性,不应该在生产中使用。**
创建ClickHouse数据库包含MySQL中所有的表以及这些表中的所有数据。
ClickHouse服务器作为MySQL副本工作。它读取binlog并执行DDL和DML查询。
这个功能是实验性的。
## 创建数据库 {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
```
**引擎参数**
- `host:port` — MySQL服务地址
- `database` — MySQL数据库名称
- `user` — MySQL用户名
- `password` — MySQL用户密码
**引擎配置**
- `max_rows_in_buffer` — 允许数据缓存到内存中的最大行数(对于单个表和无法查询的缓存数据)。当超过行数时,数据将被物化。默认值: `65505`
- `max_bytes_in_buffer` — 允许在内存中缓存数据的最大字节数(对于单个表和无法查询的缓存数据)。当超过行数时,数据将被物化。默认值: `1048576`.
- `max_rows_in_buffers` — 允许数据缓存到内存中的最大行数(对于数据库和无法查询的缓存数据)。当超过行数时,数据将被物化。默认值: `65505`.
- `max_bytes_in_buffers` — 允许在内存中缓存数据的最大字节数(对于数据库和无法查询的缓存数据)。当超过行数时,数据将被物化。默认值: `1048576`.
- `max_flush_data_time` — 允许数据在内存中缓存的最大毫秒数(对于数据库和无法查询的缓存数据)。当超过这个时间时,数据将被物化。默认值: `1000`.
- `max_wait_time_when_mysql_unavailable` — 当MySQL不可用时重试间隔(毫秒)。负值禁止重试。默认值: `1000`.
- `allows_query_when_mysql_lost` — 当mysql丢失时允许查询物化表。默认值: `0` (`false`).
```
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***')
SETTINGS
allows_query_when_mysql_lost=true,
max_wait_time_when_mysql_unavailable=10000;
```
**MySQL服务器端配置**
为了`MaterializeMySQL`正确的工作,有一些强制性的`MySQL`侧配置设置应该设置:
- `default_authentication_plugin = mysql_native_password`,因为`MaterializeMySQL`只能使用此方法授权。
- `gtid_mode = on`,因为要提供正确的`MaterializeMySQL`复制基于GTID的日志记录是必须的。注意在打开这个模式`On`时,你还应该指定`enforce_gtid_consistency = on`。
## 虚拟列 {#virtual-columns}
当使用`MaterializeMySQL`数据库引擎时,[ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md)表与虚拟的`_sign`和`_version`列一起使用。
- `_version` — 同步版本。 类型[UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — 删除标记。类型 [Int8](../../sql-reference/data-types/int-uint.md). Possible values:
- `1` — 行不会删除,
- `-1` — 行被删除。
## 支持的数据类型 {#data_types-support}
| MySQL | ClickHouse |
|-------------------------|--------------------------------------------------------------|
| TINY | [Int8](../../sql-reference/data-types/int-uint.md) |
| SHORT | [Int16](../../sql-reference/data-types/int-uint.md) |
| INT24 | [Int32](../../sql-reference/data-types/int-uint.md) |
| LONG | [UInt32](../../sql-reference/data-types/int-uint.md) |
| LONGLONG | [UInt64](../../sql-reference/data-types/int-uint.md) |
| FLOAT | [Float32](../../sql-reference/data-types/float.md) |
| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) |
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
不支持其他类型。如果MySQL表包含此类类型的列ClickHouse抛出异常"Unhandled data type"并停止复制。
[Nullable](../../sql-reference/data-types/nullable.md)已经支持
## 使用方式 {#specifics-and-recommendations}
### 兼容性限制
除了数据类型的限制外,与`MySQL`数据库相比,还存在一些限制,在实现复制之前应先解决这些限制:
- `MySQL`中的每个表都应该包含`PRIMARY KEY`
- 对于包含`ENUM`字段值超出范围(在`ENUM`签名中指定)的行的表,复制将不起作用。
### DDL查询 {#ddl-queries}
MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询则该查询将被忽略。
### Data Replication {#data-replication}
`MaterializeMySQL`不支持直接`INSERT`, `DELETE`和`UPDATE`查询. 但是,它们是在数据复制方面支持的:
- MySQL的`INSERT`查询转换为`INSERT`并携带`_sign=1`.
- MySQL的`DELETE`查询转换为`INSERT`并携带`_sign=-1`.
- MySQL的`UPDATE`查询转换为`INSERT`并携带`_sign=-1`, `INSERT`和`_sign=1`.
### 查询MaterializeMySQL表 {#select}
`SELECT`查询`MaterializeMySQL`表有一些细节:
- 如果`_version`在`SELECT`中没有指定,则使用[FINAL](../../sql-reference/statements/select/from.md#select-from-final)修饰符。所以只有带有`MAX(_version)`的行才会被选中。
- 如果`_sign`在`SELECT`中没有指定,则默认使用`WHERE _sign=1`。因此,删除的行不会包含在结果集中。
- 结果包括列中的列注释因为它们存在于SQL数据库表中。
### Index Conversion {#index-conversion}
MySQL的`PRIMARY KEY`和`INDEX`子句在ClickHouse表中转换为`ORDER BY`元组。
ClickHouse只有一个物理顺序由`ORDER BY`子句决定。要创建一个新的物理顺序,使用[materialized views](../../sql-reference/statements/create/view.md#materialized)。
**Notes**
- 带有`_sign=-1`的行不会从表中物理删除。
- `MaterializeMySQL`引擎不支持级联`UPDATE/DELETE`查询。
- 复制很容易被破坏。
- 禁止对数据库和表进行手工操作。
- `MaterializeMySQL`受[optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert)设置的影响。当MySQL服务器中的表发生变化时数据会合并到`MaterializeMySQL`数据库中相应的表中。
## 使用示例 {#examples-of-use}
MySQL操作:
``` sql
mysql> CREATE DATABASE db;
mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT);
mysql> INSERT INTO db.test VALUES (1, 11), (2, 22);
mysql> DELETE FROM db.test WHERE a=1;
mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16);
mysql> UPDATE db.test SET c='Wow!', b=222;
mysql> SELECT * FROM test;
```
```text
+---+------+------+
| a | b | c |
+---+------+------+
| 2 | 222 | Wow! |
+---+------+------+
```
ClickHouse中的数据库与MySQL服务器交换数据:
创建的数据库和表:
``` sql
CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***');
SHOW TABLES FROM mysql;
```
``` text
┌─name─┐
│ test │
└──────┘
```
然后插入数据:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬──b─┐
│ 1 │ 11 │
│ 2 │ 22 │
└───┴────┘
```
删除数据后,添加列并更新:
``` sql
SELECT * FROM mysql.test;
```
``` text
┌─a─┬───b─┬─c────┐
│ 2 │ 222 │ Wow! │
└───┴─────┴──────┘
```

View File

@ -427,29 +427,32 @@ $ curl -v 'http://localhost:8123/predefined_query'
``` xml
<http_handlers>
<rule>
<url><![CDATA[/query_param_with_url/\w+/(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></url>
<method>GET</method>
<url><![CDATA[regex:/query_param_with_url/(?P<name_1>[^/]+)]]></url>
<methods>GET</methods>
<headers>
<XXX>TEST_HEADER_VALUE</XXX>
<PARAMS_XXX><![CDATA[(?P<name_1>[^/]+)(/(?P<name_2>[^/]+))?]]></PARAMS_XXX>
<PARAMS_XXX><![CDATA[regex:(?P<name_2>[^/]+)]]></PARAMS_XXX>
</headers>
<handler>
<type>predefined_query_handler</type>
<query>SELECT value FROM system.settings WHERE name = {name_1:String}</query>
<query>SELECT name, value FROM system.settings WHERE name = {name_2:String}</query>
<query>
SELECT name, value FROM system.settings
WHERE name IN ({name_1:String}, {name_2:String})
</query>
</handler>
</rule>
<defaults/>
</http_handlers>
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_final_threads 2
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2'
max_final_threads 2
max_threads 1
```
:::warning
在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。
在一个`predefined_query_handler`中,只支持的一个`查询`。
:::
### 动态查询 {#dynamic_query_handler}

View File

@ -472,7 +472,7 @@ FROM
- `r1`-2020-01-01期间访问该网站的独立访问者数量 `cond1` 条件)。
- `r2`-在2020-01-01和2020-01-02之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond2` 条件)。
- `r3`-在2020-01-01和2020-01-03之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond3` 条件)。
- `r3`-在2020-01-01和2020-01-03 网站的独立访客数量 (`cond1` 和 `cond3` 条件)。
## uniqUpTo(N)(x) {#uniquptonx}

View File

@ -1,10 +0,0 @@
---
slug: /zh/sql-reference/data-types/multiword-types
sidebar_position: 61
sidebar_label: Multiword Type Names
title: "Multiword Types"
---
import Content from '@site/docs/en/sql-reference/data-types/multiword-types.md';
<Content />

View File

@ -643,6 +643,7 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit``value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
可能的值:
- `nanosecond`
- `microsecond`
- `millisecond`
- `second`

View File

@ -1,128 +1,702 @@
---
slug: /zh/sql-reference/functions/string-search-functions
---
# 字符串搜索函数 {#zi-fu-chuan-sou-suo-han-shu}
下列所有函数在默认的情况下区分大小写。对于不区分大小写的搜索,存在单独的变体。
# 字符串搜索函数
## 位置(大海捞针),定位(大海捞针) {#positionhaystack-needle-locatehaystack-needle}
本节中的所有函数默认情况下都区分大小写进行搜索。不区分大小写的搜索通常由单独的函数变体提供。
请注意,不区分大小写的搜索,遵循英语的小写-大写规则。
例如。英语中大写的`i`是`I`,而在土耳其语中则是`İ`, 对于英语以外的语言,结果可能会不符合预期。
在字符串`haystack`中搜索子串`needle`。
返回子串的位置以字节为单位从1开始如果未找到子串则返回0。
本节中的函数还假设搜索字符串和被搜索字符串是单字节编码文本(例如ASCII)。如果违反此假设不会抛出异常且结果为undefined。
UTF-8 编码字符串的搜索通常由单独的函数变体提供。同样,如果使用 UTF-8 函数变体但输入字符串不是 UTF-8 编码文本,不会抛出异常且结果为 undefined。
需要注意,函数不会执行自动 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。
在[字符串函数](string-functions.md) 和 [字符串替换函数](string-replace-functions.md) 会分别说明.
对于不区分大小写的搜索,请使用函数`positionCaseInsensitive`。
## position
## positionUTF8(大海捞针) {#positionutf8haystack-needle}
返回字符串`haystack`中子字符串`needle`的位置(以字节为单位,从 1 开始)。
与`position`相同但位置以Unicode字符返回。此函数工作在UTF-8编码的文本字符集中。如非此编码的字符集则返回一些非预期结果他不会抛出异常
**语法**
对于不区分大小写的搜索,请使用函数`positionCaseInsensitiveUTF8`。
``` sql
position(haystack, needle[, start_pos])
```
## 多搜索分配(干草堆,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchallpositionshaystack-needle1-needle2-needlen}
别名:
- `position(needle IN haystack)`
与`position`相同但函数返回一个数组其中包含所有匹配needle<sub></sub>的位置。
**参数**
对于不区分大小写的搜索或/和UTF-8格式使用函数`multiSearchAllPositionsCaseInsensitivemultiSearchAllPositionsUTF8multiSearchAllPositionsCaseInsensitiveUTF8`。
- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — 进行查询的子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `start_pos` 在字符串`haystack` 中开始检索的位置(从1开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选。
## multiSearchFirstPosition(大海捞针,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen}
**返回值**
与`position`相同,但返回在`haystack`中与needles字符串匹配的最左偏移。
- 若子字符串存在,返回位置(以字节为单位,从 1 开始)。
- 如果不存在子字符串,返回 0。
对于不区分大小写的搜索或/和UTF-8格式使用函数`multiSearchFirstPositionCaseInsensitivemultiSearchFirstPositionUTF8multiSearchFirstPositionCaseInsensitiveUTF8`。
如果子字符串 `needle` 为空,则:
- 如果未指定 `start_pos`,返回 `1`
- 如果 `start_pos = 0`,则返回 `1`
- 如果 `start_pos >= 1``start_pos <= length(haystack) + 1`,则返回 `start_pos`
- 否则返回 `0`
## multiSearchFirstIndex(大海捞针,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen}
以上规则同样在这些函数中生效: [locate](#locate), [positionCaseInsensitive](#positionCaseInsensitive), [positionUTF8](#positionUTF8), [positionCaseInsensitiveUTF8](#positionCaseInsensitiveUTF8)
返回在字符串`haystack`中最先查找到的needle<sub></sub>的索引`i`从1开始没有找到任何匹配项则返回0。
数据类型: `Integer`.
对于不区分大小写的搜索或/和UTF-8格式使用函数`multiSearchFirstIndexCaseInsensitivemultiSearchFirstIndexUTF8multiSearchFirstIndexCaseInsensitiveUTF8`。
**示例**
## 多搜索(大海捞针,\[针<sub>1</sub>,针<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchanyhaystack-needle1-needle2-needlen}
``` sql
SELECT position('Hello, world!', '!');
```
如果`haystack`中至少存在一个needle<sub></sub>匹配则返回1否则返回0。
结果:
``` text
┌─position('Hello, world!', '!')─┐
│ 13 │
└────────────────────────────────┘
```
示例,使用参数 `start_pos` :
``` sql
SELECT
position('Hello, world!', 'o', 1),
position('Hello, world!', 'o', 7)
```
结果:
``` text
┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐
│ 5 │ 9 │
└───────────────────────────────────┴───────────────────────────────────┘
```
示例,`needle IN haystack`:
```sql
SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s);
```
结果:
```text
┌─equals(6, position(s, '/'))─┐
│ 1 │
└─────────────────────────────┘
```
示例,子字符串 `needle` 为空:
``` sql
SELECT
position('abc', ''),
position('abc', '', 0),
position('abc', '', 1),
position('abc', '', 2),
position('abc', '', 3),
position('abc', '', 4),
position('abc', '', 5)
```
结果:
``` text
┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐
│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │
└─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘
```
## locate
类似于 [position](#position) 但交换了 `haystack``locate` 参数。
此函数的行为取决于 ClickHouse 版本:
- 在 v24.3 以下的版本中,`locate` 是函数`position`的别名,参数为 `(haystack, needle[, start_pos])`
- 在 v24.3 及以上的版本中,, `locate` 是独立的函数 (以更好地兼容 MySQL) ,参数为 `(needle, haystack[, start_pos])`。 之前的行为
可以在设置中恢复 [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order);
**语法**
``` sql
locate(needle, haystack[, start_pos])
```
## positionCaseInsensitive
类似于 [position](#position) 但是不区分大小写。
## positionUTF8
类似于 [position](#position) 但是假定 `haystack``needle` 是 UTF-8 编码的字符串。
**示例**
函数 `positionUTF8` 可以正确的将字符 `ö` 计为单个 Unicode 代码点(`ö`由两个点表示):
``` sql
SELECT positionUTF8('Motörhead', 'r');
```
结果:
``` text
┌─position('Motörhead', 'r')─┐
│ 5 │
└────────────────────────────┘
```
## positionCaseInsensitiveUTF8
类似于 [positionUTF8](#positionutf8) 但是不区分大小写。
## multiSearchAllPositions
类似于 [position](#position) 但是返回多个在字符串 `haystack``needle` 子字符串的位置的数组(以字节为单位,从 1 开始)。
对于不区分大小写的搜索或/和UTF-8格式使用函数`multiSearchAnyCaseInsensitivemultiSearchAnyUTF8multiSearchAnyCaseInsensitiveUTF8`。
:::note
在所有`multiSearch*`函数中由于实现规范needles的数量应小于2<sup>8</sup>
所有以 `multiSearch*()` 开头的函数仅支持最多 2<sup>8</sup> 个`needle`.
:::
## 匹配(大海捞针,模式) {#matchhaystack-pattern}
**语法**
检查字符串是否与`pattern`正则表达式匹配。`pattern`可以是一个任意的`re2`正则表达式。 `re2`正则表达式的[语法](https://github.com/google/re2/wiki/Syntax)比Perl正则表达式的语法存在更多限制。
``` sql
multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
```
如果不匹配返回0否则返回1。
**参数**
请注意,反斜杠符号(`\`)用于在正则表达式中转义。由于字符串中采用相同的符号来进行转义。因此,为了在正则表达式中转义符号,必须在字符串文字中写入两个反斜杠(\\)。
- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — 子字符串数组, 类型为[Array](../../sql-reference/data-types/array.md)
正则表达式与字符串一起使用,就像它是一组字节一样。正则表达式中不能包含空字节。
对于在字符串中搜索子字符串的模式最好使用LIKE或«position»因为它们更加高效。
**返回值**
## multiMatchAny大海捞针\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyhaystack-pattern1-pattern2-patternn}
- 位置数组,数组中的每个元素对应于 `needle` 数组中的一个元素。如果在 `haystack` 中找到子字符串,则返回的数组中的元素为子字符串的位置(以字节为单位,从 1 开始);如果未找到子字符串,则返回的数组中的元素为 0。
与`match`相同但如果所有正则表达式都不匹配则返回0如果任何模式匹配则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式最好使用«multisearchany»因为它更高效。
**示例**
``` sql
SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']);
```
结果:
``` text
┌─multiSearchAllPositions('Hello, World!', ['hello', '!', 'world'])─┐
│ [0,13,0] │
└───────────────────────────────────────────────────────────────────┘
```
## multiSearchAllPositionsUTF8
类似于 [multiSearchAllPositions](#multiSearchAllPositions) ,但假定 `haystack``needle`-s 是 UTF-8 编码的字符串。
## multiSearchFirstPosition
类似于 `position` , 在字符串`haystack`中匹配多个`needle`子字符串,从左开始任一匹配的子串,返回其位置。
函数 `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8``multiSearchFirstPositionCaseInsensitiveUTF8` 提供此函数的不区分大小写 以及/或 UTF-8 变体。
**语法**
```sql
multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN])
```
## multiSearchFirstIndex
在字符串`haystack`中匹配最左侧的 needle<sub>i</sub> 子字符串,返回其索引 `i` (从1开始)如无法匹配则返回0。
函数 `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8``multiSearchFirstIndexCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
**语法**
```sql
multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
```
## multiSearchAny {#multisearchany}
至少已有一个子字符串`needle`匹配 `haystack` 时返回1否则返回 0 。
函数 `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8``multiSearchAnyCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
**语法**
```sql
multiSearchAny(haystack, [needle1, needle2, …, needleN])
```
## match {#match}
返回字符串 `haystack` 是否匹配正则表达式 `pattern` [re2正则语法参考](https://github.com/google/re2/wiki/Syntax)
匹配基于 UTF-8例如`.` 匹配 Unicode 代码点 `¥`,它使用两个字节以 UTF-8 表示。T正则表达式不得包含空字节。如果 `haystack` 或`pattern`不是有效的 UTF-8则此行为为undefined。
与 re2 的默认行为不同,`.` 会匹配换行符。要禁用此功能,请在模式前面添加`(?-s)`。
如果仅希望搜索子字符串,可以使用函数 [like](#like)或 [position](#position) 来替代,这些函数的性能比此函数更高。
**语法**
```sql
match(haystack, pattern)
```
别名: `haystack REGEXP pattern operator`
## multiMatchAny
类似于 `match`,如果至少有一个表达式 `pattern<sub>i</sub>` 匹配字符串 `haystack`则返回1否则返回0。
:::note
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节否则抛出异常。这种限制是因为hyperscan API而产生的。
`multi[Fuzzy]Match*()` 函数家族使用了(Vectorscan)[https://github.com/VectorCamp/vectorscan]库. 因此,只有当 ClickHouse 编译时支持矢量扫描时,它们才会启用。
要关闭所有使用矢量扫描(hyperscan)的功能,请使用设置 `SET allow_hyperscan = 0;`
由于Vectorscan的限制`haystack` 字符串的长度必须小于2<sup>32</sup>字节。
Hyperscan 通常容易受到正则表达式拒绝服务 (ReDoS) 攻击。有关更多信息,请参见
[https://www.usenix.org/conference/usenixsecurity22/presentation/turonova](https://www.usenix.org/conference/usenixsecurity22/presentation/turonova)
[https://doi.org/10.1007/s10664-021-10033-1](https://doi.org/10.1007/s10664-021-10033-1)
[https://doi.org/10.1145/3236024.3236027](https://doi.org/10.1145/3236024.3236027)
建议用户谨慎检查提供的表达式。
:::
## multiMatchAnyIndex大海捞针\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}
如果仅希望搜索子字符串,可以使用函数 [multiSearchAny](#multisearchany) 来替代,这些函数的性能比此函数更高。
与`multiMatchAny`相同但返回与haystack匹配的任何内容的索引位置。
**语法**
## multiFuzzyMatchAny(干草堆,距离,\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn}
```sql
multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
```
与`multiMatchAny`相同但如果在haystack能够查找到任何模式匹配能够在指定的[编辑距离](https://en.wikipedia.org/wiki/Edit_distance)内进行匹配则返回1。此功能也处于实验模式可能非常慢。有关更多信息请参阅[hyperscan文档](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching)。
## multiMatchAnyIndex
## multiFuzzyMatchAnyIndex(大海捞针,距离,\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn}
类似于 `multiMatchAny` ,返回任何子串匹配 `haystack` 的索引。
与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。
**语法**
```sql
multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
```
## multiMatchAllIndices
类似于 `multiMatchAny`,返回一个数组,包含所有匹配 `haystack` 的子字符串的索引。
**语法**
```sql
multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
```
## multiFuzzyMatchAny
类似于 `multiMatchAny` ,如果任一 `pattern` 匹配 `haystack`则返回1 within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). 该功能依赖于实验特征 [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) 库,并且对于某些边缘场景可能会很慢。性能取决于编辑距离`distance`的值和使用的`partten`,但与非模糊搜索相比,它的开销总是更高的。
:::note
`multiFuzzyMatch*`函数不支持UTF-8正则表达式由于hyperscan限制这些表达式被按字节解析。
由于 hyperscan 的限制,`multiFuzzyMatch*()` 函数族不支持 UTF-8 正则表达式hyperscan以一串字节来处理
:::
**语法**
```sql
multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
```
## multiFuzzyMatchAnyIndex
类似于 `multiFuzzyMatchAny` 返回在编辑距离内与`haystack`匹配的任何索引
**语法**
```sql
multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
```
## multiFuzzyMatchAllIndices
类似于 `multiFuzzyMatchAny` 返回在编辑距离内与`haystack`匹配的所有索引的数组。
**语法**
```sql
multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
```
## extract
使用正则表达式提取字符串。如果字符串 `haystack` 不匹配正则表达式 `pattern` ,则返回空字符串。
对于没有子模式的正则表达式,该函数使用与整个正则表达式匹配的片段。否则,它使用与第一个子模式匹配的片段。
**语法**
```sql
extract(haystack, pattern)
```
## extractAll
使用正则表达式提取字符串的所有片段。如果字符串 `haystack` 不匹配正则表达式 `pattern` ,则返回空字符串。
返回所有匹配项组成的字符串数组。
子模式的行为与函数`extract`中的行为相同。
**语法**
```sql
extractAll(haystack, pattern)
```
## extractAllGroupsHorizontal
使用`pattern`正则表达式匹配`haystack`字符串的所有组。
返回一个元素为数组的数组,其中第一个数组包含与第一组匹配的所有片段,第二个数组包含与第二组匹配的所有片段,依此类推。
这个函数相比 [extractAllGroupsVertical](#extractallgroups-vertical)更慢。
**语法**
``` sql
extractAllGroupsHorizontal(haystack, pattern)
```
**参数**
- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md).
- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含 group每个 group 用括号括起来。 如果 `pattern` 不包含 group 则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md).
**返回值**
- 数据类型: [Array](../../sql-reference/data-types/array.md).
如果`haystack`不匹配`pattern`正则表达式,则返回一个空数组的数组。
**示例**
``` sql
SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
```
结果:
``` text
┌─extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','def','ghi'],['111','222','333']] │
└──────────────────────────────────────────────────────────────────────────────────────────┘
```
## extractAllGroupsVertical
使用正则表达式 `pattern`匹配字符串`haystack`中的所有group。返回一个数组其中每个数组包含每个group的匹配片段。片段按照在`haystack`中出现的顺序进行分组。
**语法**
``` sql
extractAllGroupsVertical(haystack, pattern)
```
**参数**
- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md).
- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) 必须包含group每个group用括号括起来。 如果 `pattern` 不包含group则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md).
**返回值**
- 数据类型: [Array](../../sql-reference/data-types/array.md).
如果`haystack`不匹配`pattern`正则表达式,则返回一个空数组。
**示例**
``` sql
SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
```
结果:
``` text
┌─extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','111'],['def','222'],['ghi','333']] │
└────────────────────────────────────────────────────────────────────────────────────────┘
```
## like {#like}
返回字符串 `haystack` 是否匹配 LIKE 表达式 `pattern`
一个 LIKE 表达式可以包含普通字符和以下元字符:
- `%` 表示任意数量的任意字符(包括零个字符)。
- `_` 表示单个任意字符。
- `\` 用于转义文字 `%`, `_``\`
匹配基于 UTF-8例如 `_` 匹配 Unicode 代码点 `¥`,它使用两个字节以 UTF-8 表示。
如果 `haystack``LIKE` 表达式不是有效的 UTF-8则行为是未定义的。
不会自动执行 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。
如果需要匹配字符 `%`, `_``/`(这些是 LIKE 元字符),请在其前面加上反斜杠:`\%`, `\_` 和 `\\`。
如果在非 `%`, `_``\` 字符前使用反斜杠,则反斜杠将失去其特殊含义(即被解释为字面值)。
请注意ClickHouse 要求字符串中使用反斜杠 [也需要被转义](../syntax.md#string), 因此您实际上需要编写 `\\%`、`\\_` 和 `\\\\`
对于形式为 `%needle%` 的 LIKE 表达式,函数的性能与 `position` 函数相同。
所有其他 LIKE 表达式都会被内部转换为正则表达式,并以与函数 `match` 相似的性能执行。
**语法**
```sql
like(haystack, pattern)
```
别名: `haystack LIKE pattern` (operator)
## notLike {#notlike}
类似于 `like` 但是返回相反的结果。
别名: `haystack NOT LIKE pattern` (operator)
## ilike
类似于 `like` 但是不区分大小写。
别名: `haystack ILIKE pattern` (operator)
## notILike
类似于 `ilike` 但是返回相反的结果。
别名: `haystack NOT ILIKE pattern` (operator)
## ngramDistance
计算字符串 `haystack` 和子字符串 `needle` 的 4-gram 距离。 为此,它计算两个 4-gram 多重集之间的对称差异,并通过它们的基数之和对其进行标准化。返回 0 到 1 之间的 Float32 浮点数。返回值越小,代表字符串越相似. 如果参数 `needle` or `haystack` 是常数且大小超过 32Kb则抛出异常。如果参数 `haystack``needle` 是非常数且大小超过 32Kb ,则返回值恒为 1。
函数 `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
**语法**
```sql
ngramDistance(haystack, needle)
```
## ngramSearch
类似于`ngramDistance`,但计算`needle`字符串和 `haystack` 字符串之间的非对称差异,即来自 `needle` 的 n-gram 数量减去由`needle`数量归一化的 n-gram 的公共数量 n-gram。返回 0 到 1 之间的 Float32 浮点数。结果越大,`needle` 越有可能在 `haystack` 中。该函数对于模糊字符串搜索很有用。另请参阅函数 `soundex``。
函数 `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。
:::note
如要关闭所有hyperscan函数的使用请设置`SET allow_hyperscan = 0;`。
UTF-8 变体使用了 3-gram 距离。这些并不是完全公平的 n-gram 距离。我们使用 2 字节的哈希函数来哈希 n-gram然后计算这些哈希表之间的(非)对称差异——可能会发生冲突。在使用 UTF-8 大小写不敏感格式时,我们并不使用公平的 `tolower` 函数——我们将每个码点字节的第 5 位(从零开始)和第零字节的第一个比特位位置为零(如果该串的大小超过一个字节)——这对拉丁字母和大部分西里尔字母都有效
:::
## 提取(大海捞针,图案) {#extracthaystack-pattern}
**语法**
使用正则表达式截取字符串。如果haystackpattern不匹配则返回空字符串。如果正则表达式中不包含子模式它将获取与整个正则表达式匹配的子串。否则它将获取与第一个子模式匹配的子串。
```sql
ngramSearch(haystack, needle)
```
## extractAll大海捞针图案) {#extractallhaystack-pattern}
## countSubstrings
使用正则表达式提取字符串的所有片段。如果haystackpattern正则表达式不匹配则返回一个空字符串。否则返回所有与正则表达式匹配的字符串数组。通常行为与extract函数相同它采用第一个子模式如果没有子模式则采用整个表达式
返回字符串 `haystack` 中子字符串 `needle` 出现的次数
## 像(干草堆,模式),干草堆像模式运算符 {#likehaystack-pattern-haystack-like-pattern-operator}
函数 `countSubstringsCaseInsensitive``countSubstringsCaseInsensitiveUTF8` 提供此函数的不区分大小写以及 UTF-8 变体。
检查字符串是否与简单正则表达式匹配。
正则表达式可以包含的元符号有``和`_`。
**语法**
`%` 表示任何字节数(包括零字符)。
``` sql
countSubstrings(haystack, needle[, start_pos])
```
`_` 表示任何一个字节。
**参数**
可以使用反斜杠(`\`来对元符号进行转义。请参阅«match»函数说明中有关转义的说明。
- `haystack` — 被搜索的字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — 用于搜索的模式子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `start_pos` 在字符串`haystack` 中开始检索的位置(从 1 开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选。
对于像`needle`这样的正则表达式,改函数与`position`函数一样快。
对于其他正则表达式函数与match函数相同。
**返回值**
## 不喜欢(干草堆,模式),干草堆不喜欢模式运算符 {#notlikehaystack-pattern-haystack-not-like-pattern-operator}
- 子字符串出现的次数。
like函数返回相反的结果。
数据类型: [UInt64](../../sql-reference/data-types/int-uint.md).
## 大海捞针) {#ngramdistancehaystack-needle}
**示例**
基于4-gram计算`haystack`和`needle`之间的距离计算两个4-gram集合之间的对称差异并用它们的基数和对其进行归一化。返回0到1之间的任何浮点数 越接近0则表示越多的字符串彼此相似。如果常量的`needle`或`haystack`超过32KB函数将抛出异常。如果非常量的`haystack`或`needle`字符串超过32Kb则距离始终为1。
``` sql
SELECT countSubstrings('aaaa', 'aa');
```
对于不区分大小写的搜索或/和UTF-8格式使用函数`ngramDistanceCaseInsensitivengramDistanceUTF8ngramDistanceCaseInsensitiveUTF8`。
结果:
## ツ暗ェツ氾环催ツ団ツ法ツ人) {#ngramsearchhaystack-needle}
``` text
┌─countSubstrings('aaaa', 'aa')─┐
│ 2 │
└───────────────────────────────┘
```
与`ngramDistance`相同,但计算`needle`和`haystack`之间的非对称差异——`needle`的n-gram减去`needle`归一化n-gram。可用于模糊字符串搜索。
示例,使用参数 `start_pos` :
对于不区分大小写的搜索或/和UTF-8格式使用函数`ngramSearchCaseInsensitivengramSearchUTF8ngramSearchCaseInsensitiveUTF8`。
```sql
SELECT countSubstrings('abc___abc', 'abc', 4);
```
:::note
对于UTF-8我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram然后计算这些哈希表之间的对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位从零开始和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
:::
结果:
``` text
┌─countSubstrings('abc___abc', 'abc', 4)─┐
│ 1 │
└────────────────────────────────────────┘
```
## countMatches
返回正则表达式 `pattern``haystack` 中成功匹配的次数。
**语法**
``` sql
countMatches(haystack, pattern)
```
**参数**
- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md).
- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) 数据类型为[String](../../sql-reference/data-types/string.md).
**返回值**
- 匹配次数。
数据类型: [UInt64](../../sql-reference/data-types/int-uint.md).
**示例**
``` sql
SELECT countMatches('foobar.com', 'o+');
```
结果:
``` text
┌─countMatches('foobar.com', 'o+')─┐
│ 2 │
└──────────────────────────────────┘
```
``` sql
SELECT countMatches('aaaa', 'aa');
```
结果:
``` text
┌─countMatches('aaaa', 'aa')────┐
│ 2 │
└───────────────────────────────┘
```
## countMatchesCaseInsensitive
类似于 `countMatches(haystack, pattern)` 但是不区分大小写。
## regexpExtract
提取匹配正则表达式模式的字符串`haystack`中的第一个字符串,并对应于正则表达式组索引。
**语法**
``` sql
regexpExtract(haystack, pattern[, index])
```
别名: `REGEXP_EXTRACT(haystack, pattern[, index])`.
**参数**
- `haystack` — 被匹配字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `pattern` — 正则表达式,必须是常量。类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `index` 一个大于等于 0 的整数,默认为 1 ,它代表要提取哪个正则表达式组。 [UInt or Int](../../sql-reference/data-types/int-uint.md) 可选。
**返回值**
`pattern`可以包含多个正则组, `index` 代表要提取哪个正则表达式组。如果 `index` 为 0则返回整个匹配的字符串。
数据类型: `String`.
**示例**
``` sql
SELECT
regexpExtract('100-200', '(\\d+)-(\\d+)', 1),
regexpExtract('100-200', '(\\d+)-(\\d+)', 2),
regexpExtract('100-200', '(\\d+)-(\\d+)', 0),
regexpExtract('100-200', '(\\d+)-(\\d+)');
```
结果:
``` text
┌─regexpExtract('100-200', '(\\d+)-(\\d+)', 1)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 2)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 0)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)')─┐
│ 100 │ 200 │ 100-200 │ 100 │
└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘
```
## hasSubsequence
如果`needle`是`haystack`的子序列返回1否则返回0。
子序列是从给定字符串中删除零个或多个元素而不改变剩余元素的顺序得到的序列。
**语法**
``` sql
hasSubsequence(haystack, needle)
```
**参数**
- `haystack` — 被搜索的字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — 搜索子序列,类型为[String](../../sql-reference/syntax.md#syntax-string-literal).
**返回值**
- 1, 如果`needle`是`haystack`的子序列
- 0, 如果`needle`不是`haystack`的子序列
数据类型: `UInt8`.
**示例**
``` sql
SELECT hasSubsequence('garbage', 'arg') ;
```
结果:
``` text
┌─hasSubsequence('garbage', 'arg')─┐
│ 1 │
└──────────────────────────────────┘
```
## hasSubsequenceCaseInsensitive
类似于 [hasSubsequence](#hasSubsequence) 但是不区分大小写。
## hasSubsequenceUTF8
类似于 [hasSubsequence](#hasSubsequence) 但是假定 `haystack``needle` 是 UTF-8 编码的字符串。
## hasSubsequenceCaseInsensitiveUTF8
类似于 [hasSubsequenceUTF8](#hasSubsequenceUTF8) 但是不区分大小写。

View File

@ -30,10 +30,6 @@ conflicts:
contents:
- src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug/usr/bin/clickhouse.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS

View File

@ -36,10 +36,6 @@ contents:
dst: /usr/bin/clickhouse
- src: root/usr/bin/clickhouse-extract-from-config
dst: /usr/bin/clickhouse-extract-from-config
- src: root/usr/bin/clickhouse-library-bridge
dst: /usr/bin/clickhouse-library-bridge
- src: root/usr/bin/clickhouse-odbc-bridge
dst: /usr/bin/clickhouse-odbc-bridge
- src: root/usr/share/bash-completion/completions
dst: /usr/share/bash-completion/completions
- src: root/usr/share/clickhouse

View File

@ -0,0 +1,35 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-library-bridge"
description: |
ClickHouse Library Bridge - is a separate process for loading libraries for the 'library' dictionary sources and the CatBoost library.
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
# Common packages config
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
deb:
fields:
Source: clickhouse
# Package specific content
contents:
- src: root/usr/bin/clickhouse-library-bridge
dst: /usr/bin/clickhouse-library-bridge
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-library-bridge/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-library-bridge/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-library-bridge/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-library-bridge/README.md

View File

@ -0,0 +1,35 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-odbc-bridge"
description: |
ClickHouse ODBC Bridge - is a separate process for loading ODBC drivers and interacting with external databases using the ODBC protocol.
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
# Common packages config
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
deb:
fields:
Source: clickhouse
# Package specific content
contents:
- src: root/usr/bin/clickhouse-odbc-bridge
dst: /usr/bin/clickhouse-odbc-bridge
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-odbc-bridge/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-odbc-bridge/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-odbc-bridge/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-odbc-bridge/README.md

View File

@ -17,12 +17,13 @@
#include <Access/AccessControl.h>
#include <Common/config_version.h>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
#include <Common/TerminalSize.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/Config/getClientConfigPath.h>
#include <Common/CurrentThread.h>
#include <Common/Exception.h>
#include <Common/TerminalSize.h>
#include <Common/config_version.h>
#include <Common/formatReadable.h>
#include <Columns/ColumnString.h>
#include <Poco/Util/Application.h>

View File

@ -237,7 +237,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
ASTPtr res = parseQueryAndMovePosition(
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks);
std::unique_ptr<ReadBuffer> insert_query_payload = nullptr;
std::unique_ptr<ReadBuffer> insert_query_payload;
/// If the query is INSERT ... VALUES, then we will try to parse the data.
if (auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
{

View File

@ -662,7 +662,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" <server>\n"
" <certificateFile>" << (config_dir / "server.crt").string() << "</certificateFile>\n"
" <privateKeyFile>" << (config_dir / "server.key").string() << "</privateKeyFile>\n"
" <dhParamsFile>" << (config_dir / "dhparam.pem").string() << "</dhParamsFile>\n"
" </server>\n"
" </openSSL>\n"
"</clickhouse>\n";

View File

@ -24,9 +24,4 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-library-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR})
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -35,7 +35,7 @@ public:
ExternalDictionaryLibraryAPI::CStrings strings; // will pass pointer to lib
private:
std::unique_ptr<ExternalDictionaryLibraryAPI::CString[]> ptr_holder = nullptr;
std::unique_ptr<ExternalDictionaryLibraryAPI::CString[]> ptr_holder;
Container strings_holder;
};

View File

@ -30,12 +30,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro)
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-odbc-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR})
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
if(ENABLE_TESTS)
add_subdirectory(tests)

View File

@ -12,6 +12,7 @@
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/NumberParser.h>
#include <Interpreters/Context.h>
#include <Common/logger_useful.h>
#include <Common/BridgeProtocolVersion.h>
#include <Common/quoteString.h>

View File

@ -5,7 +5,6 @@
#if USE_ODBC
#include <Interpreters/Context_fwd.h>
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Poco/Logger.h>

View File

@ -734,13 +734,17 @@ try
LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info);
#endif
bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log");
// Initialize global thread pool. Do it before we fetch configs from zookeeper
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
GlobalThreadPool::initialize(
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size);
server_settings.thread_pool_queue_size,
will_have_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0,
will_have_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0);
/// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
SCOPE_EXIT({
Stopwatch watch;

View File

@ -96,7 +96,7 @@
<to>https://{bucket}.s3.amazonaws.com</to>
</s3>
<gs>
<to>https://{bucket}.storage.googleapis.com</to>
<to>https://storage.googleapis.com/{bucket}</to>
</gs>
<oss>
<to>https://{bucket}.oss.aliyuncs.com</to>

View File

@ -16,6 +16,8 @@
#include <IO/ReadBufferFromString.h>
#include <Poco/UUIDGenerator.h>
#include <base/insertAtEnd.h>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
namespace fs = std::filesystem;

View File

@ -1,6 +1,8 @@
#include <Access/AccessRights.h>
#include <Common/logger_useful.h>
#include <base/sort.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include <boost/container/small_vector.hpp>
#include <boost/range/adaptor/map.hpp>
#include <unordered_map>

View File

@ -205,7 +205,7 @@ enum class AccessType
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT, SYSTEM WAIT FAILPOINT", GLOBAL, SYSTEM) \
M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \
M(SYSTEM_JEMALLOC, "SYSTEM JEMALLOC PURGE, SYSTEM JEMALLOC ENABLE PROFILE, SYSTEM JEMALLOC DISABLE PROFILE, SYSTEM JEMALLOC FLUSH PROFILE", GLOBAL, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \

View File

@ -115,34 +115,34 @@ public:
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
this->data(place).add(*columns[0], row_num, arena);
data(place).add(*columns[0], row_num, arena);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
this->data(place).addManyDefaults(*columns[0], 0, arena);
data(place).addManyDefaults(*columns[0], 0, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).add(this->data(rhs), arena);
data(place).add(data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf, *serialization);
data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, *serialization, arena);
data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).insertResultInto(to);
data(place).insertResultInto(to);
}
};

View File

@ -1,11 +1,11 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Common/CurrentThread.h>
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;

Some files were not shown because too many files have changed in this diff Show More