Merge branch 'master' into run_func_tests_in_parallel

This commit is contained in:
alesapin 2020-12-07 13:30:41 +03:00
commit 371cb23946
451 changed files with 45125 additions and 1928 deletions

11
.gitignore vendored
View File

@ -125,4 +125,15 @@ website/package-lock.json
# Toolchains
/cmake/toolchain/*
# ANTLR extension cache
.antlr
# ANTLR generated files
/src/Parsers/New/*.interp
/src/Parsers/New/*.tokens
/src/Parsers/New/ClickHouseParserBaseVisitor.*
# pytest-profiling
/prof
*.iml

9
.gitmodules vendored
View File

@ -142,9 +142,6 @@
[submodule "contrib/replxx"]
path = contrib/replxx
url = https://github.com/ClickHouse-Extras/replxx.git
[submodule "contrib/ryu"]
path = contrib/ryu
url = https://github.com/ClickHouse-Extras/ryu.git
[submodule "contrib/avro"]
path = contrib/avro
url = https://github.com/ClickHouse-Extras/avro.git
@ -172,6 +169,9 @@
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git
[submodule "contrib/antlr4-runtime"]
path = contrib/antlr4-runtime
url = https://github.com/ClickHouse-Extras/antlr4-runtime.git
[submodule "contrib/sentry-native"]
path = contrib/sentry-native
url = https://github.com/ClickHouse-Extras/sentry-native.git
@ -206,3 +206,6 @@
path = contrib/abseil-cpp
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
branch = lts_2020_02_25
[submodule "contrib/dragonbox"]
path = contrib/dragonbox
url = https://github.com/ClickHouse-Extras/dragonbox.git

View File

@ -257,6 +257,8 @@ if (WITH_COVERAGE AND COMPILER_GCC)
set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage")
endif()
set(COMPILER_FLAGS "${COMPILER_FLAGS}")
set (CMAKE_BUILD_COLOR_MAKEFILE ON)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}")
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}")

View File

@ -16,5 +16,4 @@ ClickHouse® is an open-source column-oriented database management system that a
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [SF Bay Area ClickHouse Meetup (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/274498897/) on 2 December 2020.
* [SF Bay Area ClickHouse Virtual Office Hours (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/274273549/) on 20 January 2020.

View File

@ -76,12 +76,6 @@
# define NO_SANITIZE_THREAD
#endif
#if defined __GNUC__ && !defined __clang__
# define OPTIMIZE(x) __attribute__((__optimize__(x)))
#else
# define OPTIMIZE(x)
#endif
/// A macro for suppressing warnings about unused variables or function results.
/// Useful for structured bindings which have no standard way to declare this.
#define UNUSED(...) (void)(__VA_ARGS__)

View File

@ -5,7 +5,6 @@ LIBRARY()
ADDINCL(
GLOBAL clickhouse/base
GLOBAL contrib/libs/cctz/include
)
CFLAGS (GLOBAL -DARCADIA_BUILD)
@ -24,7 +23,7 @@ ELSEIF (OS_LINUX)
ENDIF ()
PEERDIR(
contrib/libs/cctz/src
contrib/libs/cctz
contrib/libs/cxxsupp/libcxx-filesystem
contrib/libs/poco/Net
contrib/libs/poco/Util

View File

@ -4,7 +4,6 @@ LIBRARY()
ADDINCL(
GLOBAL clickhouse/base
GLOBAL contrib/libs/cctz/include
)
CFLAGS (GLOBAL -DARCADIA_BUILD)
@ -23,7 +22,7 @@ ELSEIF (OS_LINUX)
ENDIF ()
PEERDIR(
contrib/libs/cctz/src
contrib/libs/cctz
contrib/libs/cxxsupp/libcxx-filesystem
contrib/libs/poco/Net
contrib/libs/poco/Util

View File

@ -104,6 +104,11 @@ void Connection::connect(const char* db,
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Enables auto-reconnect.
bool reconnect = true;
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Specifies particular ssl key and certificate if it needs
if (mysql_ssl_set(driver.get(), ifNotEmpty(ssl_key), ifNotEmpty(ssl_cert), ifNotEmpty(ssl_ca), nullptr, nullptr))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
@ -115,11 +120,6 @@ void Connection::connect(const char* db,
if (mysql_set_character_set(driver.get(), "UTF8"))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Enables auto-reconnect.
bool reconnect = true;
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
is_connected = true;
}

View File

@ -26,6 +26,7 @@ void Pool::Entry::incrementRefCount()
mysql_thread_init();
}
void Pool::Entry::decrementRefCount()
{
if (!data)
@ -150,28 +151,39 @@ Pool::Entry Pool::tryGet()
initialize();
/// Searching for connection which was established but wasn't used.
for (auto & connection : connections)
/// Try to pick an idle connection from already allocated
for (auto connection_it = connections.cbegin(); connection_it != connections.cend();)
{
if (connection->ref_count == 0)
Connection * connection_ptr = *connection_it;
/// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator
if (connection_ptr->ref_count == 0)
{
Entry res(connection, this);
return res.tryForceConnected() ? res : Entry();
Entry res(connection_ptr, this);
if (res.tryForceConnected()) /// Tries to reestablish connection as well
return res;
auto & logger = Poco::Util::Application::instance().logger();
logger.information("Idle connection to mysql server cannot be recovered, dropping it.");
/// This one is disconnected, cannot be reestablished and so needs to be disposed of.
connection_it = connections.erase(connection_it);
::delete connection_ptr; /// TODO: Manual memory management is awkward (matches allocConnection() method)
}
else
++connection_it;
}
/// Throws if pool is overflowed.
if (connections.size() >= max_connections)
throw Poco::Exception("mysqlxx::Pool is full");
/// Allocates new connection.
Connection * conn = allocConnection(true);
if (conn)
return Entry(conn, this);
Connection * connection_ptr = allocConnection(true);
if (connection_ptr)
return {connection_ptr, this};
return Entry();
return {};
}
void Pool::removeConnection(Connection* connection)
{
std::lock_guard<std::mutex> lock(mutex);
@ -199,11 +211,9 @@ void Pool::Entry::forceConnected() const
throw Poco::RuntimeException("Tried to access NULL database connection.");
Poco::Util::Application & app = Poco::Util::Application::instance();
if (data->conn.ping())
return;
bool first = true;
do
while (!tryForceConnected())
{
if (first)
first = false;
@ -225,7 +235,26 @@ void Pool::Entry::forceConnected() const
pool->rw_timeout,
pool->enable_local_infile);
}
while (!data->conn.ping());
}
bool Pool::Entry::tryForceConnected() const
{
auto * const mysql_driver = data->conn.getDriver();
const auto prev_connection_id = mysql_thread_id(mysql_driver);
if (data->conn.ping()) /// Attempts to reestablish lost connection
{
const auto current_connection_id = mysql_thread_id(mysql_driver);
if (prev_connection_id != current_connection_id)
{
auto & logger = Poco::Util::Application::instance().logger();
logger.information("Connection to mysql server has been reestablished. Connection id changed: %d -> %d",
prev_connection_id, current_connection_id);
}
return true;
}
return false;
}

View File

@ -127,10 +127,7 @@ public:
void forceConnected() const;
/// Connects to database. If connection is failed then returns false.
bool tryForceConnected() const
{
return data->conn.ping();
}
bool tryForceConnected() const;
void incrementRefCount();
void decrementRefCount();

View File

@ -1,2 +1,5 @@
add_executable (mysqlxx_test mysqlxx_test.cpp)
target_link_libraries (mysqlxx_test PRIVATE mysqlxx)
add_executable (mysqlxx_pool_test mysqlxx_pool_test.cpp)
target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx)

View File

@ -0,0 +1,98 @@
#include <mysqlxx/mysqlxx.h>
#include <chrono>
#include <iostream>
#include <sstream>
#include <thread>
namespace
{
mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool)
{
using namespace std::chrono;
constexpr size_t max_tries = 3;
mysqlxx::Pool::Entry worker_connection;
for (size_t try_no = 1; try_no <= max_tries; ++try_no)
{
try
{
worker_connection = connections_pool.tryGet();
if (!worker_connection.isNull())
{
return worker_connection;
}
}
catch (const Poco::Exception & e)
{
if (e.displayText().find("mysqlxx::Pool is full") != std::string::npos)
{
std::cerr << e.displayText() << std::endl;
}
std::cerr << "Connection to " << connections_pool.getDescription() << " failed: " << e.displayText() << std::endl;
}
std::clog << "Connection to all replicas failed " << try_no << " times" << std::endl;
std::this_thread::sleep_for(1s);
}
std::stringstream message;
message << "Connections to all replicas failed: " << connections_pool.getDescription();
throw Poco::Exception(message.str());
}
}
int main(int, char **)
{
using namespace std::chrono;
const char * remote_mysql = "localhost";
const std::string test_query = "SHOW DATABASES";
mysqlxx::Pool mysql_conn_pool("", remote_mysql, "default", "10203040", 3306);
size_t iteration = 0;
while (++iteration)
{
std::clog << "Iteration: " << iteration << std::endl;
try
{
std::clog << "Acquiring DB connection ...";
mysqlxx::Pool::Entry worker = getWithFailover(mysql_conn_pool);
std::clog << "ok" << std::endl;
std::clog << "Preparing query (5s sleep) ...";
std::this_thread::sleep_for(5s);
mysqlxx::Query query = worker->query();
query << test_query;
std::clog << "ok" << std::endl;
std::clog << "Querying result (5s sleep) ...";
std::this_thread::sleep_for(5s);
mysqlxx::UseQueryResult result = query.use();
std::clog << "ok" << std::endl;
std::clog << "Fetching result data (5s sleep) ...";
std::this_thread::sleep_for(5s);
size_t rows_count = 0;
while (result.fetch())
++rows_count;
std::clog << "ok" << std::endl;
std::clog << "Read " << rows_count << " rows." << std::endl;
}
catch (const Poco::Exception & e)
{
std::cerr << "Iteration FAILED:\n" << e.displayText() << std::endl;
}
std::clog << "====================" << std::endl;
std::this_thread::sleep_for(3s);
}
}

View File

@ -24,7 +24,7 @@ option (WEVERYTHING "Enable -Weverything option with some exceptions." ON)
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
# Only in release build because debug has too large stack frames.
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang"))
add_warning(frame-larger-than=32768)
add_warning(frame-larger-than=65536)
endif ()
if (COMPILER_CLANG)

View File

@ -21,6 +21,7 @@ endif()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (antlr4-runtime-cmake)
add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing-sumbur)
@ -34,7 +35,6 @@ add_subdirectory (libmetrohash)
add_subdirectory (lz4-cmake)
add_subdirectory (murmurhash)
add_subdirectory (replxx-cmake)
add_subdirectory (ryu-cmake)
add_subdirectory (unixodbc-cmake)
add_subdirectory (xz)
@ -321,3 +321,5 @@ endif()
if (USE_INTERNAL_ROCKSDB_LIBRARY)
add_subdirectory(rocksdb-cmake)
endif()
add_subdirectory(dragonbox)

1
contrib/antlr4-runtime vendored Submodule

@ -0,0 +1 @@
Subproject commit a2fa7b76e2ee16d2ad955e9214a90bbf79da66fc

View File

@ -0,0 +1,156 @@
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/antlr4-runtime)
set (SRCS
${LIBRARY_DIR}/ANTLRErrorListener.cpp
${LIBRARY_DIR}/ANTLRErrorStrategy.cpp
${LIBRARY_DIR}/ANTLRFileStream.cpp
${LIBRARY_DIR}/ANTLRInputStream.cpp
${LIBRARY_DIR}/atn/AbstractPredicateTransition.cpp
${LIBRARY_DIR}/atn/ActionTransition.cpp
${LIBRARY_DIR}/atn/AmbiguityInfo.cpp
${LIBRARY_DIR}/atn/ArrayPredictionContext.cpp
${LIBRARY_DIR}/atn/ATN.cpp
${LIBRARY_DIR}/atn/ATNConfig.cpp
${LIBRARY_DIR}/atn/ATNConfigSet.cpp
${LIBRARY_DIR}/atn/ATNDeserializationOptions.cpp
${LIBRARY_DIR}/atn/ATNDeserializer.cpp
${LIBRARY_DIR}/atn/ATNSerializer.cpp
${LIBRARY_DIR}/atn/ATNSimulator.cpp
${LIBRARY_DIR}/atn/ATNState.cpp
${LIBRARY_DIR}/atn/AtomTransition.cpp
${LIBRARY_DIR}/atn/BasicBlockStartState.cpp
${LIBRARY_DIR}/atn/BasicState.cpp
${LIBRARY_DIR}/atn/BlockEndState.cpp
${LIBRARY_DIR}/atn/BlockStartState.cpp
${LIBRARY_DIR}/atn/ContextSensitivityInfo.cpp
${LIBRARY_DIR}/atn/DecisionEventInfo.cpp
${LIBRARY_DIR}/atn/DecisionInfo.cpp
${LIBRARY_DIR}/atn/DecisionState.cpp
${LIBRARY_DIR}/atn/EmptyPredictionContext.cpp
${LIBRARY_DIR}/atn/EpsilonTransition.cpp
${LIBRARY_DIR}/atn/ErrorInfo.cpp
${LIBRARY_DIR}/atn/LexerAction.cpp
${LIBRARY_DIR}/atn/LexerActionExecutor.cpp
${LIBRARY_DIR}/atn/LexerATNConfig.cpp
${LIBRARY_DIR}/atn/LexerATNSimulator.cpp
${LIBRARY_DIR}/atn/LexerChannelAction.cpp
${LIBRARY_DIR}/atn/LexerCustomAction.cpp
${LIBRARY_DIR}/atn/LexerIndexedCustomAction.cpp
${LIBRARY_DIR}/atn/LexerModeAction.cpp
${LIBRARY_DIR}/atn/LexerMoreAction.cpp
${LIBRARY_DIR}/atn/LexerPopModeAction.cpp
${LIBRARY_DIR}/atn/LexerPushModeAction.cpp
${LIBRARY_DIR}/atn/LexerSkipAction.cpp
${LIBRARY_DIR}/atn/LexerTypeAction.cpp
${LIBRARY_DIR}/atn/LL1Analyzer.cpp
${LIBRARY_DIR}/atn/LookaheadEventInfo.cpp
${LIBRARY_DIR}/atn/LoopEndState.cpp
${LIBRARY_DIR}/atn/NotSetTransition.cpp
${LIBRARY_DIR}/atn/OrderedATNConfigSet.cpp
${LIBRARY_DIR}/atn/ParseInfo.cpp
${LIBRARY_DIR}/atn/ParserATNSimulator.cpp
${LIBRARY_DIR}/atn/PlusBlockStartState.cpp
${LIBRARY_DIR}/atn/PlusLoopbackState.cpp
${LIBRARY_DIR}/atn/PrecedencePredicateTransition.cpp
${LIBRARY_DIR}/atn/PredicateEvalInfo.cpp
${LIBRARY_DIR}/atn/PredicateTransition.cpp
${LIBRARY_DIR}/atn/PredictionContext.cpp
${LIBRARY_DIR}/atn/PredictionMode.cpp
${LIBRARY_DIR}/atn/ProfilingATNSimulator.cpp
${LIBRARY_DIR}/atn/RangeTransition.cpp
${LIBRARY_DIR}/atn/RuleStartState.cpp
${LIBRARY_DIR}/atn/RuleStopState.cpp
${LIBRARY_DIR}/atn/RuleTransition.cpp
${LIBRARY_DIR}/atn/SemanticContext.cpp
${LIBRARY_DIR}/atn/SetTransition.cpp
${LIBRARY_DIR}/atn/SingletonPredictionContext.cpp
${LIBRARY_DIR}/atn/StarBlockStartState.cpp
${LIBRARY_DIR}/atn/StarLoopbackState.cpp
${LIBRARY_DIR}/atn/StarLoopEntryState.cpp
${LIBRARY_DIR}/atn/TokensStartState.cpp
${LIBRARY_DIR}/atn/Transition.cpp
${LIBRARY_DIR}/atn/WildcardTransition.cpp
${LIBRARY_DIR}/BailErrorStrategy.cpp
${LIBRARY_DIR}/BaseErrorListener.cpp
${LIBRARY_DIR}/BufferedTokenStream.cpp
${LIBRARY_DIR}/CharStream.cpp
${LIBRARY_DIR}/CommonToken.cpp
${LIBRARY_DIR}/CommonTokenFactory.cpp
${LIBRARY_DIR}/CommonTokenStream.cpp
${LIBRARY_DIR}/ConsoleErrorListener.cpp
${LIBRARY_DIR}/DefaultErrorStrategy.cpp
${LIBRARY_DIR}/dfa/DFA.cpp
${LIBRARY_DIR}/dfa/DFASerializer.cpp
${LIBRARY_DIR}/dfa/DFAState.cpp
${LIBRARY_DIR}/dfa/LexerDFASerializer.cpp
${LIBRARY_DIR}/DiagnosticErrorListener.cpp
${LIBRARY_DIR}/Exceptions.cpp
${LIBRARY_DIR}/FailedPredicateException.cpp
${LIBRARY_DIR}/InputMismatchException.cpp
${LIBRARY_DIR}/InterpreterRuleContext.cpp
${LIBRARY_DIR}/IntStream.cpp
${LIBRARY_DIR}/Lexer.cpp
${LIBRARY_DIR}/LexerInterpreter.cpp
${LIBRARY_DIR}/LexerNoViableAltException.cpp
${LIBRARY_DIR}/ListTokenSource.cpp
${LIBRARY_DIR}/misc/InterpreterDataReader.cpp
${LIBRARY_DIR}/misc/Interval.cpp
${LIBRARY_DIR}/misc/IntervalSet.cpp
${LIBRARY_DIR}/misc/MurmurHash.cpp
${LIBRARY_DIR}/misc/Predicate.cpp
${LIBRARY_DIR}/NoViableAltException.cpp
${LIBRARY_DIR}/Parser.cpp
${LIBRARY_DIR}/ParserInterpreter.cpp
${LIBRARY_DIR}/ParserRuleContext.cpp
${LIBRARY_DIR}/ProxyErrorListener.cpp
${LIBRARY_DIR}/RecognitionException.cpp
${LIBRARY_DIR}/Recognizer.cpp
${LIBRARY_DIR}/RuleContext.cpp
${LIBRARY_DIR}/RuleContextWithAltNum.cpp
${LIBRARY_DIR}/RuntimeMetaData.cpp
${LIBRARY_DIR}/support/Any.cpp
${LIBRARY_DIR}/support/Arrays.cpp
${LIBRARY_DIR}/support/CPPUtils.cpp
${LIBRARY_DIR}/support/guid.cpp
${LIBRARY_DIR}/support/StringUtils.cpp
${LIBRARY_DIR}/Token.cpp
${LIBRARY_DIR}/TokenSource.cpp
${LIBRARY_DIR}/TokenStream.cpp
${LIBRARY_DIR}/TokenStreamRewriter.cpp
${LIBRARY_DIR}/tree/ErrorNode.cpp
${LIBRARY_DIR}/tree/ErrorNodeImpl.cpp
${LIBRARY_DIR}/tree/IterativeParseTreeWalker.cpp
${LIBRARY_DIR}/tree/ParseTree.cpp
${LIBRARY_DIR}/tree/ParseTreeListener.cpp
${LIBRARY_DIR}/tree/ParseTreeVisitor.cpp
${LIBRARY_DIR}/tree/ParseTreeWalker.cpp
${LIBRARY_DIR}/tree/pattern/Chunk.cpp
${LIBRARY_DIR}/tree/pattern/ParseTreeMatch.cpp
${LIBRARY_DIR}/tree/pattern/ParseTreePattern.cpp
${LIBRARY_DIR}/tree/pattern/ParseTreePatternMatcher.cpp
${LIBRARY_DIR}/tree/pattern/RuleTagToken.cpp
${LIBRARY_DIR}/tree/pattern/TagChunk.cpp
${LIBRARY_DIR}/tree/pattern/TextChunk.cpp
${LIBRARY_DIR}/tree/pattern/TokenTagToken.cpp
${LIBRARY_DIR}/tree/TerminalNode.cpp
${LIBRARY_DIR}/tree/TerminalNodeImpl.cpp
${LIBRARY_DIR}/tree/Trees.cpp
${LIBRARY_DIR}/tree/xpath/XPath.cpp
${LIBRARY_DIR}/tree/xpath/XPathElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathLexer.cpp
${LIBRARY_DIR}/tree/xpath/XPathLexerErrorListener.cpp
${LIBRARY_DIR}/tree/xpath/XPathRuleAnywhereElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathRuleElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathTokenAnywhereElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathTokenElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathWildcardAnywhereElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathWildcardElement.cpp
${LIBRARY_DIR}/UnbufferedCharStream.cpp
${LIBRARY_DIR}/UnbufferedTokenStream.cpp
${LIBRARY_DIR}/Vocabulary.cpp
${LIBRARY_DIR}/WritableToken.cpp
)
add_library (antlr4-runtime ${SRCS})
target_include_directories (antlr4-runtime SYSTEM PUBLIC ${LIBRARY_DIR})

1
contrib/dragonbox vendored Submodule

@ -0,0 +1 @@
Subproject commit b2751c65c0592c0239aec3becd53d0ea2fde9329

1
contrib/ryu vendored

@ -1 +0,0 @@
Subproject commit 5b4a853534b47438b4d97935370f6b2397137c2b

View File

@ -1,10 +0,0 @@
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/ryu)
add_library(ryu
${LIBRARY_DIR}/ryu/d2fixed.c
${LIBRARY_DIR}/ryu/d2s.c
${LIBRARY_DIR}/ryu/f2s.c
${LIBRARY_DIR}/ryu/generic_128.c
)
target_include_directories(ryu SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}")

View File

@ -94,8 +94,8 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
if ! wget --spider --quiet --prefer-family=IPv6 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
# will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay)
if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi

View File

@ -131,12 +131,12 @@ function clone_submodules
cd "$FASTTEST_SOURCE"
SUBMODULES_TO_UPDATE=(
contrib/antlr4-runtime
contrib/boost
contrib/zlib-ng
contrib/libxml2
contrib/poco
contrib/libunwind
contrib/ryu
contrib/fmtlib
contrib/base64
contrib/cctz
@ -154,6 +154,7 @@ function clone_submodules
contrib/croaring
contrib/miniselect
contrib/xz
contrib/dragonbox
)
git submodule sync

View File

@ -28,6 +28,7 @@ RUN apt-get update \
libssl-dev \
libcurl4-openssl-dev \
gdb \
software-properties-common \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
@ -37,6 +38,22 @@ RUN apt-get update \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 5:19.03.13~3-0~ubuntu-bionic
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
docker-ce \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
&& apt-get clean
RUN dockerd --version; docker --version
RUN python3 -m pip install \
PyMySQL \
aerospike \
@ -60,28 +77,6 @@ RUN python3 -m pip install \
tzlocal \
urllib3
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 17.09.1-ce
RUN set -eux; \
\
# this "case" statement is generated via "update.sh"
\
if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \
echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \
exit 1; \
fi; \
\
tar --extract \
--file docker.tgz \
--strip-components 1 \
--directory /usr/local/bin/ \
; \
rm docker.tgz; \
\
dockerd --version; \
docker --version
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/
COPY compose/ /compose/

View File

@ -8,6 +8,7 @@ RUN apt-get update -y \
apt-get install --yes --no-install-recommends \
brotli \
expect \
zstd \
lsof \
ncdu \
netcat-openbsd \

View File

@ -8,6 +8,7 @@ RUN apt-get --allow-unauthenticated update -y \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
alien \
brotli \
zstd \
cmake \
devscripts \
expect \

View File

@ -24,6 +24,7 @@ RUN apt-get update -y \
tree \
moreutils \
brotli \
zstd \
gdb \
lsof \
unixodbc \

View File

@ -184,6 +184,10 @@ Sparse indexes allow you to work with a very large number of table rows, because
ClickHouse does not require a unique primary key. You can insert multiple rows with the same primary key.
You can use `Nullable`-typed expressions in the `PRIMARY KEY` and `ORDER BY` clauses. To allow this feature, turn on the [allow_nullable_key](../../../operations/settings/settings.md#allow-nullable-key) setting.
The [NULLS_LAST](../../../sql-reference/statements/select/order-by.md#sorting-of-special-values) principle applies for `NULL` values in the `ORDER BY` clause.
### Selecting the Primary Key {#selecting-the-primary-key}
The number of columns in the primary key is not explicitly limited. Depending on the data structure, you can include more or fewer columns in the primary key. This may:
@ -579,6 +583,7 @@ Tags:
- `disk` — a disk within a volume.
- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volumes disks.
- `move_factor` — when the amount of available space gets lower than this factor, data automatically start to move on the next volume if any (by default, 0.1).
- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
Cofiguration examples:
@ -607,6 +612,18 @@ Cofiguration examples:
</volumes>
<move_factor>0.2</move_factor>
</moving_from_ssd_to_hdd>
<small_jbod_with_external_no_merges>
<volumes>
<main>
<disk>jbod1</disk>
</main>
<external>
<disk>external</disk>
<prefer_not_to_merge>true</prefer_not_to_merge>
</external>
</volumes>
</small_jbod_with_external_no_merges>
</policies>
...
</storage_configuration>

View File

@ -2364,4 +2364,15 @@ Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#
Default value: `\N`.
## allow_nullable_key {#allow-nullable-key}
Allows using of the [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable)-typed values in a sorting and a primary key for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) tables.
Possible values:
- 1 — `Nullable`-type expressions are allowed in keys.
- 0 — `Nullable`-type expressions are not allowed in keys.
Default value: `0`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -23,4 +23,44 @@ Please note that `errors_count` is updated once per query to the cluster, but `e
- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)
**Example**
```sql
:) SELECT * FROM system.clusters LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
cluster: test_cluster
shard_num: 1
shard_weight: 1
replica_num: 1
host_name: clickhouse01
host_address: 172.23.0.11
port: 9000
is_local: 1
user: default
default_database:
errors_count: 0
estimated_recovery_time: 0
Row 2:
──────
cluster: test_cluster
shard_num: 1
shard_weight: 1
replica_num: 2
host_name: clickhouse02
host_address: 172.23.0.12
port: 9000
is_local: 0
user: default
default_database:
errors_count: 0
estimated_recovery_time: 0
2 rows in set. Elapsed: 0.002 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) <!--hide-->

View File

@ -23,4 +23,50 @@ The `system.columns` table contains the following columns (the column type is sh
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
**Example**
```sql
:) select * from system.columns LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
database: system
table: aggregate_function_combinators
name: name
type: String
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
Row 2:
──────
database: system
table: aggregate_function_combinators
name: is_internal
type: UInt8
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
2 rows in set. Elapsed: 0.002 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) <!--hide-->

View File

@ -11,3 +11,21 @@ Columns:
- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/disks) <!--hide-->
**Example**
```sql
:) SELECT * FROM system.disks;
```
```text
┌─name────┬─path─────────────────┬───free_space─┬──total_space─┬─keep_free_space─┐
│ default │ /var/lib/clickhouse/ │ 276392587264 │ 490652508160 │ 0 │
└─────────┴──────────────────────┴──────────────┴──────────────┴─────────────────┘
1 rows in set. Elapsed: 0.001 sec.
```

View File

@ -8,3 +8,26 @@ Columns:
- `is_aggregate`(`UInt8`) — Whether the function is aggregate.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/functions) <!--hide-->
**Example**
```sql
SELECT * FROM system.functions LIMIT 10;
```
```text
┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
│ sumburConsistentHash │ 0 │ 0 │ │
│ yandexConsistentHash │ 0 │ 0 │ │
│ demangle │ 0 │ 0 │ │
│ addressToLine │ 0 │ 0 │ │
│ JSONExtractRaw │ 0 │ 0 │ │
│ JSONExtractKeysAndValues │ 0 │ 0 │ │
│ JSONExtract │ 0 │ 0 │ │
│ JSONExtractString │ 0 │ 0 │ │
│ JSONExtractFloat │ 0 │ 0 │ │
│ JSONExtractInt │ 0 │ 0 │ │
└──────────────────────────┴──────────────┴──────────────────┴──────────┘
10 rows in set. Elapsed: 0.002 sec.
```

View File

@ -10,4 +10,45 @@ Columns:
- `type` (String) — Setting type (implementation specific string value).
- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed.
**Example**
```sql
:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
```
```text
Row 1:
──────
name: index_granularity
value: 8192
changed: 0
description: How many rows correspond to one primary key value.
type: SettingUInt64
Row 2:
──────
name: min_bytes_for_wide_part
value: 0
changed: 0
description: Minimal uncompressed size in bytes to create part in wide format instead of compact
type: SettingUInt64
Row 3:
──────
name: min_rows_for_wide_part
value: 0
changed: 0
description: Minimal number of rows to create part in wide format instead of compact
type: SettingUInt64
Row 4:
──────
name: merge_max_block_size
value: 8192
changed: 0
description: How many rows in blocks should be formed for merge operations.
type: SettingUInt64
4 rows in set. Elapsed: 0.001 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merge_tree_settings) <!--hide-->

View File

@ -6,4 +6,27 @@ You can use this table for tests, or if you need to do a brute force search.
Reads from this table are not parallelized.
**Example**
```sql
:) SELECT * FROM system.numbers LIMIT 10;
```
```text
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
10 rows in set. Elapsed: 0.001 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers) <!--hide-->

View File

@ -4,4 +4,27 @@ The same as [system.numbers](../../operations/system-tables/numbers.md) but read
Used for tests.
**Example**
```sql
:) SELECT * FROM system.numbers_mt LIMIT 10;
```
```text
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
10 rows in set. Elapsed: 0.001 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers_mt) <!--hide-->

View File

@ -6,4 +6,18 @@ This table is used if a `SELECT` query doesnt specify the `FROM` clause.
This is similar to the `DUAL` table found in other DBMSs.
**Example**
```sql
:) SELECT * FROM system.one LIMIT 10;
```
```text
┌─dummy─┐
│ 0 │
└───────┘
1 rows in set. Elapsed: 0.001 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/one) <!--hide-->

View File

@ -14,4 +14,51 @@ Columns:
- `query` (String) The query text. For `INSERT`, it doesnt include the data to insert.
- `query_id` (String) Query ID, if defined.
```sql
:) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
```
```text
Row 1:
──────
is_initial_query: 1
user: default
query_id: 35a360fa-3743-441d-8e1f-228c938268da
address: ::ffff:172.23.0.1
port: 47588
initial_user: default
initial_query_id: 35a360fa-3743-441d-8e1f-228c938268da
initial_address: ::ffff:172.23.0.1
initial_port: 47588
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
elapsed: 0.000582537
is_cancelled: 0
read_rows: 0
read_bytes: 0
total_rows_approx: 0
written_rows: 0
written_bytes: 0
memory_usage: 0
peak_memory_usage: 0
query: SELECT * from system.processes LIMIT 10 FORMAT Vertical;
thread_ids: [67]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ContextLock','RWLockAcquiredReadLocks']
ProfileEvents.Values: [1,1,36,1,10,1,89,16,1]
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage']
Settings.Values: ['0','in_order','1','10000000000']
1 rows in set. Elapsed: 0.002 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/processes) <!--hide-->

View File

@ -10,6 +10,7 @@ Columns:
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy.
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.
- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table.

View File

@ -52,4 +52,56 @@ This table contains the following columns (the column type is shown in brackets)
The `system.tables` table is used in `SHOW TABLES` query implementation.
```sql
:) SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
database: system
name: aggregate_function_combinators
uuid: 00000000-0000-0000-0000-000000000000
engine: SystemAggregateFunctionCombinators
is_temporary: 0
data_paths: []
metadata_path: /var/lib/clickhouse/metadata/system/aggregate_function_combinators.sql
metadata_modification_time: 1970-01-01 03:00:00
dependencies_database: []
dependencies_table: []
create_table_query:
engine_full:
partition_key:
sorting_key:
primary_key:
sampling_key:
storage_policy:
total_rows: ᴺᵁᴸᴸ
total_bytes: ᴺᵁᴸᴸ
Row 2:
──────
database: system
name: asynchronous_metrics
uuid: 00000000-0000-0000-0000-000000000000
engine: SystemAsynchronousMetrics
is_temporary: 0
data_paths: []
metadata_path: /var/lib/clickhouse/metadata/system/asynchronous_metrics.sql
metadata_modification_time: 1970-01-01 03:00:00
dependencies_database: []
dependencies_table: []
create_table_query:
engine_full:
partition_key:
sorting_key:
primary_key:
sampling_key:
storage_policy:
total_rows: ᴺᵁᴸᴸ
total_bytes: ᴺᵁᴸᴸ
2 rows in set. Elapsed: 0.004 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) <!--hide-->

View File

@ -591,3 +591,7 @@ Result:
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/string_search_functions/) <!--hide-->
## countMatches(haystack, pattern) {#countmatcheshaystack-pattern}
Returns the number of regular expression matches for a `pattern` in a `haystack`.

View File

@ -9,7 +9,7 @@ toc_title: DELETE
ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
```
Allows to delete data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
Deletes data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
!!! note "Note"
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use.

View File

@ -19,5 +19,4 @@ The first two commands are lightweight in a sense that they only change metadata
Also, they are replicated, syncing indices metadata via ZooKeeper.
!!! note "Note"
Index manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including
[replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants).
Index manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants).

View File

@ -241,6 +241,46 @@ ALTER TABLE hits MOVE PART '20190301_14343_16206_438' TO VOLUME 'slow'
ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd'
```
## UPDATE IN PARTITION {#update-in-partition}
Manipulates data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
Syntax:
``` sql
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
```
### Example
``` sql
ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2;
```
### See Also
- [UPDATE](../../../sql-reference/statements/alter/update.md#alter-table-update-statements)
## DELETE IN PARTITION {#delete-in-partition}
Deletes data in the specifies partition matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
Syntax:
``` sql
ALTER TABLE [db.]table DELETE [IN PARTITION partition_id] WHERE filter_expr
```
### Example
``` sql
ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
```
### See Also
- [DELETE](../../../sql-reference/statements/alter/delete.md#alter-mutations)
## How to Set Partition Expression {#alter-how-to-specify-part-expr}
You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:
@ -258,4 +298,6 @@ All the rules above are also true for the [OPTIMIZE](../../../sql-reference/stat
OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
```
`IN PARTITION` specifies the partition to which the [UPDATE](../../../sql-reference/statements/alter/update.md#alter-table-update-statements) or [DELETE](../../../sql-reference/statements/alter/delete.md#alter-mutations) expressions are applied as a result of the `ALTER TABLE` query. New parts are created only from the specified partition. In this way, `IN PARTITION` helps to reduce the load when the table is divided into many partitions, and you only need to update the data point-by-point.
The examples of `ALTER ... PARTITION` queries are demonstrated in the tests [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) and [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql).

View File

@ -9,7 +9,7 @@ toc_title: UPDATE
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
```
Allows to manipulate data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
Manipulates data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
!!! note "Note"
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use.

View File

@ -0,0 +1,150 @@
---
toc_priority: 39
toc_title: EXPLAIN
---
# EXPLAIN Statement {#explain}
Show the execution plan of a statement.
Syntax:
```sql
EXPLAIN [AST | SYNTAX | PLAN | PIPELINE] [setting = value, ...] SELECT ... [FORMAT ...]
```
Example:
```sql
EXPLAIN SELECT sum(number) FROM numbers(10) UNION ALL SELECT sum(number) FROM numbers(10) ORDER BY sum(number) ASC FORMAT TSV;
```
```sql
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
Aggregating
Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (SystemNumbers)
Expression (Projection)
MergingSorted (Merge sorted streams for ORDER BY)
MergeSorting (Merge sorted blocks for ORDER BY)
PartialSorting (Sort each block for ORDER BY)
Expression (Before ORDER BY and SELECT)
Aggregating
Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (SystemNumbers)
```
## EXPLAIN Types {#explain-types}
- `AST` — Abstract syntax tree.
- `SYNTAX` — Query text after AST-level optimizations.
- `PLAN` — Query execution plan.
- `PIPELINE` — Query execution pipeline.
### EXPLAIN AST {#explain-ast}
Dump query AST.
Example:
```sql
EXPLAIN AST SELECT 1;
```
```sql
SelectWithUnionQuery (children 1)
ExpressionList (children 1)
SelectQuery (children 1)
ExpressionList (children 1)
Literal UInt64_1
```
### EXPLAIN SYNTAX {#explain-syntax}
Return query after syntax optimizations.
Example:
```sql
EXPLAIN SYNTAX SELECT * FROM system.numbers AS a, system.numbers AS b, system.numbers AS c;
```
```sql
SELECT
`--a.number` AS `a.number`,
`--b.number` AS `b.number`,
number AS `c.number`
FROM
(
SELECT
number AS `--a.number`,
b.number AS `--b.number`
FROM system.numbers AS a
CROSS JOIN system.numbers AS b
) AS `--.s`
CROSS JOIN system.numbers AS c
```
### EXPLAIN PLAN {#explain-plan}
Dump query plan steps.
Settings:
- `header` — Print output header for step. Default: 0.
- `description` — Print step description. Default: 1.
- `actions` — Print detailed information about step actions. Default: 0.
Example:
```sql
EXPLAIN SELECT sum(number) FROM numbers(10) GROUP BY number % 4;
```
```sql
Union
Expression (Projection)
Expression (Before ORDER BY and SELECT)
Aggregating
Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (SystemNumbers)
```
!!! note "Note"
Step and query cost estimation is not supported.
### EXPLAIN PIPELINE {#explain-pipeline}
Settings:
- `header` — Print header for each output port. Default: 0.
- `graph` — Use DOT graph description language. Default: 0.
- `compact` — Print graph in compact mode if graph is enabled. Default: 1.
Example:
```sql
EXPLAIN PIPELINE SELECT sum(number) FROM numbers_mt(100000) GROUP BY number % 4;
```
```sql
(Union)
(Expression)
ExpressionTransform
(Expression)
ExpressionTransform
(Aggregating)
Resize 2 → 1
AggregatingTransform × 2
(Expression)
ExpressionTransform × 2
(SettingQuotaAndLimits)
(ReadFromStorage)
NumbersMt × 2 0 → 1
```
[Оriginal article](https://clickhouse.tech/docs/en/sql-reference/statements/explain/) <!--hide-->

View File

@ -29,3 +29,4 @@ Statements represent various kinds of action you can perform using SQL queries.
- [SET ROLE](../../sql-reference/statements/set-role.md)
- [TRUNCATE](../../sql-reference/statements/truncate.md)
- [USE](../../sql-reference/statements/use.md)
- [EXPLAIN](../../sql-reference/statements/explain.md)

View File

@ -152,7 +152,7 @@ ClickHouse can manage background processes in [MergeTree](../../engines/table-en
Provides possibility to stop background merges for tables in the MergeTree family:
``` sql
SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
!!! note "Note"
@ -163,7 +163,7 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
Provides possibility to start background merges for tables in the MergeTree family:
``` sql
SYSTEM START MERGES [[db.]merge_tree_family_table_name]
SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
### STOP TTL MERGES {#query_language-stop-ttl-merges}

View File

@ -57,7 +57,7 @@ Identifiers are:
Identifiers can be quoted or non-quoted. The latter is preferred.
Non-quoted identifiers must match the regex `^[0-9a-zA-Z_]*[a-zA-Z_]$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.`
Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x`, `_1`, `X_y__Z123_`.
If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``.

View File

@ -177,6 +177,10 @@ Marks numbers: 0 1 2 3 4 5 6 7 8
ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом.
Ключ в `PRIMARY KEY` и `ORDER BY` может иметь тип `Nullable`. За поддержку этой возможности отвечает настройка [allow_nullable_key](../../../operations/settings/settings.md#allow-nullable-key).
При сортировке с использованием выражения `ORDER BY` для значений `NULL` всегда работает принцип [NULLS_LAST](../../../sql-reference/statements/select/order-by.md#sorting-of-special-values).
### Выбор первичного ключа {#vybor-pervichnogo-kliucha}
Количество столбцов в первичном ключе не ограничено явным образом. В зависимости от структуры данных в первичный ключ можно включать больше или меньше столбцов. Это может:
@ -565,6 +569,7 @@ ALTER TABLE example_table
- `disk` — диск, находящийся внутри тома.
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома.
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1).
- `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками.
Примеры конфигураций:
@ -593,6 +598,19 @@ ALTER TABLE example_table
</volumes>
<move_factor>0.2</move_factor>
</moving_from_ssd_to_hdd>
<small_jbod_with_external_no_merges>
<volumes>
<main>
<disk>jbod1</disk>
</main>
<external>
<disk>external</disk>
<prefer_not_to_merge>true</prefer_not_to_merge>
</external>
</volumes>
</small_jbod_with_external_no_merges>
</policies>
...
</storage_configuration>

View File

@ -2235,4 +2235,15 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
Значение по умолчанию: `\N`.
## allow_nullable_key {#allow-nullable-key}
Включает или отключает поддержку типа [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) для ключей таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree).
Возможные значения:
- 1 — включает поддержку типа `Nullable` для ключей таблиц.
- 0 — отключает поддержку типа `Nullable` для ключей таблиц.
Значение по умолчанию: `0`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -10,6 +10,7 @@
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — имена дисков, содержащихся в политике хранения.
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений).
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1).
- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Значение настройки `prefer_not_to_merge`. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками.
Если политика хранения содержит несколько томов, то каждому тому соответствует отдельная запись в таблице.

View File

@ -9,7 +9,7 @@ toc_title: DELETE
ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
```
Позволяет удалить данные, соответствующие указанному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations).
Удаляет данные, соответствующие указанному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations).
!!! note "Note"
Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования.

View File

@ -243,6 +243,46 @@ ALTER TABLE hits MOVE PART '20190301_14343_16206_438' TO VOLUME 'slow'
ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd'
```
## UPDATE IN PARTITION {#update-in-partition}
Манипулирует данными в указанной партиции, соответствующими заданному выражению фильтрации. Реализовано как мутация [mutation](../../../sql-reference/statements/alter/index.md#mutations).
Синтаксис:
``` sql
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
```
### Пример
``` sql
ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2;
```
### Смотрите также
- [UPDATE](../../../sql-reference/statements/alter/update.md#alter-table-update-statements)
## DELETE IN PARTITION {#delete-in-partition}
Удаляет данные в указанной партиции, соответствующие указанному выражению фильтрации. Реализовано как мутация [mutation](../../../sql-reference/statements/alter/index.md#mutations).
Синтаксис:
``` sql
ALTER TABLE [db.]table DELETE [IN PARTITION partition_id] WHERE filter_expr
```
### Пример
``` sql
ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
```
### Смотрите также
- [DELETE](../../../sql-reference/statements/alter/delete.md#alter-mutations)
## Как задавать имя партиции в запросах ALTER {#alter-how-to-specify-part-expr}
Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать:
@ -262,6 +302,8 @@ ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd'
OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;
```
`IN PARTITION` указывает на партицию, для которой применяются выражения [UPDATE](../../../sql-reference/statements/alter/update.md#alter-table-update-statements) или [DELETE](../../../sql-reference/statements/alter/delete.md#alter-mutations) в результате запроса `ALTER TABLE`. Новые куски создаются только в указанной партиции. Таким образом, `IN PARTITION` помогает снизить нагрузку, когда таблица разбита на множество партиций, а вам нужно обновить данные лишь точечно.
Примеры запросов `ALTER ... PARTITION` можно посмотреть в тестах: [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) и [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql).
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->

View File

@ -9,7 +9,7 @@ toc_title: UPDATE
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
```
Позволяет манипулировать данными, соответствующими заданному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations).
Манипулирует данными, соответствующими заданному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations).
!!! note "Note"
Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования.

View File

@ -130,7 +130,7 @@ ClickHouse может управлять фоновыми процессами
Позволяет остановить фоновые мержи для таблиц семейства MergeTree:
``` sql
SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
!!! note "Note"
@ -141,7 +141,7 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
Включает фоновые мержи для таблиц семейства MergeTree:
``` sql
SYSTEM START MERGES [[db.]merge_tree_family_table_name]
SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
### STOP TTL MERGES {#query_language-stop-ttl-merges}

View File

@ -1,12 +1,36 @@
---
toc_priority: 14
toc_title: "\u266A\u64CD\u573A\u266A"
toc_title: 体验平台
---
# ClickHouse体验平台 {#clickhouse-playground}
[ClickHouse体验平台](https://play.clickhouse.tech?file=welcome) 允许人们通过即时运行查询来尝试ClickHouse而无需设置他们的服务器或集群。
体验平台中提供了几个示例数据集以及显示ClickHouse特性的示例查询。
体验平台中提供几个示例数据集以及显示ClickHouse特性的示例查询。还有一些ClickHouse LTS版本可供尝试。
ClickHouse体验平台提供了小型集群[Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse)实例配置(4 vCPU, 32 GB RAM)它们托管在[Yandex.Cloud](https://cloud.yandex.com/). 更多信息查询[cloud providers](../commercial/cloud.md).
您可以使用任何HTTP客户端对ClickHouse体验平台进行查询例如[curl](https://curl.haxx.se)或者[wget](https://www.gnu.org/software/wget/),或使用[JDBC](../interfaces/jdbc.md)或者[ODBC](../interfaces/odbc.md)驱动连接。关于支持ClickHouse的软件产品的更多信息详见[here](../interfaces/index.md).
## Credentials {#credentials}
| 参数 | 值 |
|:--------------------|:----------------------------------------|
| HTTPS端点 | `https://play-api.clickhouse.tech:8443` |
| TCP端点 | `play-api.clickhouse.tech:9440` |
| 用户 | `playground` |
| 密码 | `clickhouse` |
还有一些带有特定ClickHouse版本的附加信息来试验它们之间的差异(端口和用户/密码与上面相同):
- 20.3 LTS: `play-api-v20-3.clickhouse.tech`
- 19.14 LTS: `play-api-v19-14.clickhouse.tech`
!!! note "注意"
所有这些端点都需要安全的TLS连接。
## 查询限制 {#limitations}
查询以只读用户身份执行。 这意味着一些局限性:
@ -14,33 +38,34 @@ toc_title: "\u266A\u64CD\u573A\u266A"
- 不允许插入查询
还强制执行以下设置:
- [`max_result_bytes=10485760`](../operations/settings/query_complexity/#max-result-bytes)
- [`max_result_rows=2000`](../operations/settings/query_complexity/#setting-max_result_rows)
- [`result_overflow_mode=break`](../operations/settings/query_complexity/#result-overflow-mode)
- [`max_execution_time=60000`](../operations/settings/query_complexity/#max-execution-time)
- [max_result_bytes=10485760](../operations/settings/query-complexity/#max-result-bytes)
- [max_result_rows=2000](../operations/settings/query-complexity/#setting-max_result_rows)
- [result_overflow_mode=break](../operations/settings/query-complexity/#result-overflow-mode)
- [max_execution_time=60000](../operations/settings/query-complexity/#max-execution-time)
ClickHouse体验还有如下
[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse)
实例托管 [Yandex云](https://cloud.yandex.com/)。
更多信息 [云提供商](../commercial/cloud.md)。
ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的.
体验平台后端只是一个ClickHouse集群没有任何额外的服务器端应用程序。
体验平台也同样提供了ClickHouse HTTPS服务端口。
## 示例 {#examples}
您可以使用任何HTTP客户端向体验平台进行查询例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。
有关支持ClickHouse的软件产品的更多信息请访问 [这里](../interfaces/index.md)。
| 参数 | 值 |
|:---------|:--------------------------------------|
| 服务端口 | https://play-api.clickhouse.tech:8443 |
| 用户 | `playground` |
| 密码 | `clickhouse` |
请注意,此服务端口需要安全连接。
示例:
使用`curl`连接Https服务
``` bash
curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&user=playground&password=clickhouse&database=datasets"
curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse\!';&user=playground&password=clickhouse&database=datasets"
```
TCP连接示例[CLI](../interfaces/cli.md):
``` bash
clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'"
```
## Implementation Details {#implementation-details}
ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的。
ClickHouse体验平台是一个ClickHouse集群没有任何附加的服务器端应用程序。如上所述ClickHouse的HTTPS和TCP/TLS端点也可以作为体验平台的一部分公开使用, 代理通过[Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/)增加一层额外的保护和改善连接。
!!! warning "注意"
**强烈不推荐**在任何其他情况下将ClickHouse服务器暴露给公共互联网。确保它只在私有网络上侦听并由正确配置的防火墙监控。

View File

@ -1,19 +1,27 @@
---
toc_folder_title: Interfaces
toc_priority: 14
toc_title: 客户端
---
# 客户端 {#interfaces}
ClickHouse提供了两个网络接口两者都可以选择包装在TLS中以提高安全性
ClickHouse提供了两个网络接口(两个都可以选择包装在TLS中以增加安全性):
- [HTTP](http.md),记录在案,易于使用.
- [本地TCP](tcp.md),这有较少的开销.
- [HTTP](http.md), 包含文档,易于使用。
- [Native TCP](../interfaces/tcp.md),简单,方便使用。
在大多数情况下,建议使用适当的工具或库,而不是直接与这些工具或库进行交互。 Yandex的官方支持如下
\* [命令行客户端](cli.md)
\* [JDBC驱动程序](jdbc.md)
\* [ODBC驱动程序](odbc.md)
\* [C++客户端库](cpp.md)
在大多数情况下建议使用适当的工具或库而不是直接与它们交互。Yandex官方支持的项目有:
还有许多第三方库可供使用ClickHouse
\* [客户端库](third-party/client-libraries.md)
\* [集成](third-party/integrations.md)
\* [可视界面](third-party/gui.md)
- [命令行客户端](../interfaces/cli.md)
- [JDBC驱动](../interfaces/jdbc.md)
- [ODBC驱动](../interfaces/odbc.md)
- [C++客户端](../interfaces/cpp.md)
[来源文章](https://clickhouse.tech/docs/zh/interfaces/) <!--hide-->
还有一些广泛的第三方库可供ClickHouse使用:
- [客户端库](../interfaces/third-party/client-libraries.md)
- [第三方集成库](../interfaces/third-party/integrations.md)
- [可视化UI](../interfaces/third-party/gui.md)
[来源文章](https://clickhouse.tech/docs/en/interfaces/) <!--hide-->

View File

@ -1,13 +1,108 @@
# 条件函数 {#tiao-jian-han-shu}
## 如果cond那么否则cond 运算符然后else {#ifcond-then-else-cond-operator-then-else}
## if {#if}
控制条件分支。 与大多数系统不同ClickHouse始终评估两个表达式 `then``else`
**语法**
``` sql
SELECT if(cond, then, else)
```
如果条件 `cond` 的计算结果为非零值,则返回表达式 `then` 的结果,并且跳过表达式 `else` 的结果(如果存在)。 如果 `cond` 为零或 `NULL`,则将跳过 `then` 表达式的结果,并返回 `else` 表达式的结果(如果存在)。
**参数**
- `cond` 条件结果可以为零或不为零。 类型是 UInt8Nullable(UInt8) 或 NULL。
- `then` - 如果满足条件则返回的表达式。
- `else` - 如果不满足条件则返回的表达式。
**返回值**
该函数执行 `then``else` 表达式并返回其结果,这取决于条件 `cond` 最终是否为零。
**示例**
查询:
``` sql
SELECT if(1, plus(2, 2), plus(2, 6))
```
结果:
``` text
┌─plus(2, 2)─┐
│ 4 │
└────────────┘
```
查询:
``` sql
SELECT if(0, plus(2, 2), plus(2, 6))
```
结果:
``` text
┌─plus(2, 6)─┐
│ 8 │
└────────────┘
```
- `then``else` 必须具有最低的通用类型。
**示例:**
给定表`LEFT_RIGHT`:
``` sql
SELECT *
FROM LEFT_RIGHT
┌─left─┬─right─┐
│ ᴺᵁᴸᴸ │ 4 │
│ 1 │ 3 │
│ 2 │ 2 │
│ 3 │ 1 │
│ 4 │ ᴺᵁᴸᴸ │
└──────┴───────┘
```
下面的查询比较了 `left``right` 的值:
``` sql
SELECT
left,
right,
if(left < right, 'left is smaller than right', 'right is greater or equal than left') AS is_smaller
FROM LEFT_RIGHT
WHERE isNotNull(left) AND isNotNull(right)
┌─left─┬─right─┬─is_smaller──────────────────────────┐
│ 1 │ 3 │ left is smaller than right │
│ 2 │ 2 │ right is greater or equal than left │
│ 3 │ 1 │ right is greater or equal than left │
└──────┴───────┴─────────────────────────────────────┘
```
注意:在此示例中未使用'NULL'值,请检查[条件中的NULL值](#null-values-in-conditionals) 部分。
## 三元运算符 {#ternary-operator}
`if` 函数相同。
语法: `cond ? then : else`
如果`cond = 0`则返回`then`,如果`cond = 0`则返回`else`。
`cond`必须是`UInt8`类型,`then`和`else`必须存在最低的共同类型。
`then`和`else`可以是`NULL`
- `cond`必须是`UInt8`类型,`then`和`else`必须存在最低的共同类型。
## 多 {#multiif}
- `then`和`else`可以是`NULL`
## multiIf {#multiif}
允许您在查询中更紧凑地编写[CASE](../operators/index.md#operator_case)运算符。
@ -27,18 +122,74 @@
**示例**
存在如下一张表
再次使用表 `LEFT_RIGHT`
┌─x─┬────y─┐
│ 1 │ ᴺᵁᴸᴸ │
│ 2 │ 3 │
└───┴──────┘
``` sql
SELECT
left,
right,
multiIf(left < right, 'left is smaller', left > right, 'left is greater', left = right, 'Both equal', 'Null value') AS result
FROM LEFT_RIGHT
执行查询 `SELECT multiIf(isNull(y), x, y < 3, y, NULL) FROM t_null`。结果:
┌─left─┬─right─┬─result──────────┐
│ ᴺᵁᴸᴸ │ 4 │ Null value │
│ 1 │ 3 │ left is smaller │
│ 2 │ 2 │ Both equal │
│ 3 │ 1 │ left is greater │
│ 4 │ ᴺᵁᴸᴸ │ Null value │
└──────┴───────┴─────────────────┘
```
## 直接使用条件结果 {#using-conditional-results-directly}
┌─multiIf(isNull(y), x, less(y, 3), y, NULL)─┐
│ 1 │
│ ᴺᵁᴸᴸ │
└────────────────────────────────────────────┘
条件结果始终为 `0``1``NULL`。 因此,你可以像这样直接使用条件结果:
``` sql
SELECT left < right AS is_small
FROM LEFT_RIGHT
┌─is_small─┐
│ ᴺᵁᴸᴸ │
│ 1 │
│ 0 │
│ 0 │
│ ᴺᵁᴸᴸ │
└──────────┘
```
## 条件中的NULL值 {#null-values-in-conditionals}
当条件中包含 `NULL` 值时,结果也将为 `NULL`
``` sql
SELECT
NULL < 1,
2 < NULL,
NULL < NULL,
NULL = NULL
┌─less(NULL, 1)─┬─less(2, NULL)─┬─less(NULL, NULL)─┬─equals(NULL, NULL)─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────────────┴───────────────┴──────────────────┴────────────────────┘
```
因此,如果类型是 `Nullable`,你应该仔细构造查询。
以下示例说明这一点。
``` sql
SELECT
left,
right,
multiIf(left < right, 'left is smaller', left > right, 'right is smaller', 'Both equal') AS faulty_result
FROM LEFT_RIGHT
┌─left─┬─right─┬─faulty_result────┐
│ ᴺᵁᴸᴸ │ 4 │ Both equal │
│ 1 │ 3 │ left is smaller │
│ 2 │ 2 │ Both equal │
│ 3 │ 1 │ right is smaller │
│ 4 │ ᴺᵁᴸᴸ │ Both equal │
└──────┴───────┴──────────────────┘
```
[来源文章](https://clickhouse.tech/docs/en/query_language/functions/conditional_functions/) <!--hide-->

View File

@ -1,5 +1,71 @@
# 编码函数 {#bian-ma-han-shu}
## char {#char}
返回长度为传递参数数量的字符串并且每个字节都有对应参数的值。接受数字Numeric类型的多个参数。如果参数的值超出了UInt8数据类型的范围则将其转换为UInt8并可能进行舍入和溢出。
**语法**
``` sql
char(number_1, [number_2, ..., number_n]);
```
**参数**
- `number_1, number_2, ..., number_n` — 数值参数解释为整数。类型: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md).
**返回值**
- 给定字节数的字符串。
类型: `String`
**示例**
查询:
``` sql
SELECT char(104.1, 101, 108.9, 108.9, 111) AS hello
```
结果:
``` text
┌─hello─┐
│ hello │
└───────┘
```
你可以通过传递相应的字节来构造任意编码的字符串。 这是UTF-8的示例:
查询:
``` sql
SELECT char(0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5, 0xD1, 0x82) AS hello;
```
结果:
``` text
┌─hello──┐
│ привет │
└────────┘
```
查询:
``` sql
SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
```
结果:
``` text
┌─hello─┐
│ 你好 │
└───────┘
```
## hex {#hex}
接受`String``unsigned integer``Date`或`DateTime`类型的参数。返回包含参数的十六进制表示的字符串。使用大写字母`A-F`。不使用`0x`前缀或`h`后缀。对于字符串,所有字节都简单地编码为两个十六进制数字。数字转换为大端(«易阅读»)格式。对于数字,去除其中较旧的零,但仅限整个字节。例如,`hex1='01'`。 `Date`被编码为自Unix时间开始以来的天数。 `DateTime`编码为自Unix时间开始以来的秒数。
@ -17,11 +83,11 @@
接受FixedString16值。返回包含36个字符的文本格式的字符串。
## 位掩码列表(num) {#bitmasktolistnum}
## bitmaskToList(num) {#bitmasktolistnum}
接受一个整数。返回一个字符串其中包含一组2的幂列表其列表中的所有值相加等于这个整数。列表使用逗号分割按升序排列。
## 位掩码阵列(num) {#bitmasktoarraynum}
## bitmaskToArray(num) {#bitmasktoarraynum}
接受一个整数。返回一个UInt64类型数组其中包含一组2的幂列表其列表中的所有值相加等于这个整数。数组中的数字按升序排列。

View File

@ -6,7 +6,7 @@
您可以向它传递任何类型的参数,但传递的参数将不会使用在任何随机数生成过程中。
此参数的唯一目的是防止公共子表达式消除,以便在相同的查询中使用相同的随机函数生成不同的随机数。
## 兰德 {#rand}
## rand, rand32 {#rand}
返回一个UInt32类型的随机数字所有UInt32类型的数字被生成的概率均相等。此函数线性同于的方式生成随机数。

View File

@ -1,6 +1,6 @@
# 字符串函数 {#zi-fu-chuan-han-shu}
## {#string-functions-empty}
## empty {#string-functions-empty}
对于空字符串返回1对于非空字符串返回0。
结果类型是UInt8。
@ -13,13 +13,13 @@
结果类型是UInt8。
该函数也适用于数组。
## 长度 {#length}
## length {#length}
返回字符串的字节长度。
结果类型是UInt64。
该函数也适用于数组。
## 长度8 {#lengthutf8}
## lengthUTF8 {#lengthutf8}
假定字符串以UTF-8编码组成的文本返回此字符串的Unicode字符长度。如果传入的字符串不是UTF-8编码则函数可能返回一个预期外的值不会抛出异常
结果类型是UInt64。
@ -29,16 +29,16 @@
假定字符串以UTF-8编码组成的文本返回此字符串的Unicode字符长度。如果传入的字符串不是UTF-8编码则函数可能返回一个预期外的值不会抛出异常
结果类型是UInt64。
## 字符长度,字符长度 {#character-length-character-length}
## character_length,CHARACTER_LENGTH {#character-length-character-length}
假定字符串以UTF-8编码组成的文本返回此字符串的Unicode字符长度。如果传入的字符串不是UTF-8编码则函数可能返回一个预期外的值不会抛出异常
结果类型是UInt64。
## 低一点 {#lower-lcase}
## lower, lcase {#lower-lcase}
将字符串中的ASCII转换为小写。
## 上,ucase {#upper-ucase}
## upper, ucase {#upper-ucase}
将字符串中的ASCII转换为大写。
@ -84,7 +84,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
└───────────────────────┘
```
## 反向 {#reverse}
## reverse {#reverse}
反转字符串。
@ -118,11 +118,11 @@ SELECT format('{} {}', 'Hello', 'World')
与[concat](#concat-s1-s2)相同区别在于你需要保证concat(s1, s2, s3) -\> s4是单射的它将用于GROUP BY的优化。
## 子串(s,offset,length),mid(s,offset,length),substr(s,offset,length) {#substrings-offset-length-mids-offset-length-substrs-offset-length}
## substring(s,offset,length),mid(s,offset,length),substr(s,offset,length) {#substrings-offset-length-mids-offset-length-substrs-offset-length}
以字节为单位截取指定位置字符串返回以offset位置为开头长度为length的子串。offset从1开始与标准SQL相同offsetlength参数必须是常量。
## substringf8(s,offset,length) {#substringutf8s-offset-length}
## substringUTF8(s,offset,length) {#substringutf8s-offset-length}
substring相同但其操作单位为Unicode字符函数假设字符串是以UTF-8进行编码的文本。如果不是则可能返回一个预期外的结果不会抛出异常
@ -150,7 +150,7 @@ SELECT format('{} {}', 'Hello', 'World')
返回是否以指定的后缀结尾。如果字符串以指定的后缀结束则返回1否则返回0。
## 开始使用s前缀) {#startswiths-prefix}
## startsWiths前缀) {#startswiths-prefix}
返回是否以指定的前缀开头。如果字符串以指定的前缀开头则返回1否则返回0。

View File

@ -151,7 +151,7 @@ DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER
删除配额。
已删除的配额将从分配配额的所有实体撤销。
已删除的配额将从分配配额的所有实体撤销。
### 语法 {#drop-quota-syntax}
@ -161,9 +161,9 @@ DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
## DROP SETTINGS PROFILE {#drop-settings-profile-statement}
删除配额
删除settings配置
已删除的配额将从分配配额的所有实体撤销。
已删除的settings配置将从分配该settings配置的所有实体撤销。
### 语法 {#drop-settings-profile-syntax}
@ -177,7 +177,7 @@ DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT format]
```
返回单 `UInt8`-type column,其中包含单个值 `0` 如果表或数据库不存在,或 `1` 如果该表存在于指定的数据库中。
返回单`UInt8` 类型的列,其中包含单个值 `0` 如果表或数据库不存在,或 `1` 如果该表存在于指定的数据库中。
## KILL QUERY {#kill-query-statement}

View File

@ -352,17 +352,20 @@
/// The query is saved in browser history (in state JSON object)
/// as well as in URL fragment identifier.
if (query != previous_query) {
previous_query = query;
var state = {
query: query,
status: this.status,
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
};
var title = "ClickHouse Query: " + query;
history.pushState(
{
query: query,
status: this.status,
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
},
title,
window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query));
var url = window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query);
if (previous_query == '') {
history.replaceState(state, title, url);
} else {
history.pushState(state, title, url);
}
document.title = title;
previous_query = query;
}
} else {
//console.log(this);

View File

@ -1,8 +1,8 @@
#include <AggregateFunctions/parseAggregateFunctionParameters.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Common/typeid_cast.h>
#include <Core/Defines.h>
namespace DB
@ -25,6 +25,13 @@ Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const
for (size_t i = 0; i < parameters.size(); ++i)
{
const auto * literal = parameters[i]->as<ASTLiteral>();
ASTPtr func_literal;
if (!literal)
if (const auto * func = parameters[i]->as<ASTFunction>())
if ((func_literal = func->toLiteral()))
literal = func_literal->as<ASTLiteral>();
if (!literal)
{
throw Exception(

View File

@ -6,18 +6,6 @@ if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()
if(COMPILER_PIPE)
set(MAX_COMPILER_MEMORY 2500)
else()
set(MAX_COMPILER_MEMORY 1500)
endif()
if(MAKE_STATIC_LIBRARIES)
set(MAX_LINKER_MEMORY 3500)
else()
set(MAX_LINKER_MEMORY 2500)
endif()
include(../cmake/limit_jobs.cmake)
set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/Common/config_version.h)
set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/Common/config.h)
@ -49,6 +37,7 @@ add_subdirectory (Dictionaries)
add_subdirectory (Disks)
add_subdirectory (Storages)
add_subdirectory (Parsers)
add_subdirectory (Parsers/New)
add_subdirectory (IO)
add_subdirectory (Functions)
add_subdirectory (Interpreters)
@ -186,12 +175,12 @@ endif()
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
add_library (dbms STATIC ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PRIVATE jemalloc libdivide ${DBMS_COMMON_LIBRARIES})
target_link_libraries (dbms PRIVATE clickhouse_parsers_new jemalloc libdivide ${DBMS_COMMON_LIBRARIES})
set (all_modules dbms)
else()
add_library (dbms SHARED ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES})
target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide)
target_link_libraries (clickhouse_interpreters PRIVATE clickhouse_parsers_new jemalloc libdivide)
list (APPEND all_modules dbms)
# force all split libs to be linked
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
@ -253,7 +242,7 @@ target_link_libraries (clickhouse_common_io
PUBLIC
common
${DOUBLE_CONVERSION_LIBRARIES}
ryu
dragonbox_to_chars
)
if(RE2_LIBRARY)
@ -329,6 +318,8 @@ dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${MINISELECT_INCLUDE_DIR})
if (ZSTD_LIBRARY)
dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY})
target_link_libraries (clickhouse_common_io PUBLIC ${ZSTD_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${ZSTD_INCLUDE_DIR})
if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR)
dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR})
endif ()

View File

@ -526,6 +526,8 @@
M(557, UNKNOWN_UNION) \
M(558, EXPECTED_ALL_OR_DISTINCT) \
M(559, INVALID_GRPC_QUERY_INFO) \
M(560, ZSTD_ENCODER_FAILED) \
M(561, ZSTD_DECODER_FAILED) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -37,12 +37,16 @@ void encodeSHA256(const void * text, size_t size, unsigned char * out)
String getOpenSSLErrors()
{
BIO * mem = BIO_new(BIO_s_mem());
SCOPE_EXIT(BIO_free(mem));
ERR_print_errors(mem);
char * buf = nullptr;
size_t size = BIO_get_mem_data(mem, &buf);
return String(buf, size);
String res;
ERR_print_errors_cb([](const char * str, size_t len, void * ctx)
{
String & out = *reinterpret_cast<String*>(ctx);
if (!out.empty())
out += ", ";
out.append(str, len);
return 1;
}, &res);
return res;
}
}

View File

@ -216,7 +216,6 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
if (is_sequential)
{
auto seq_num = it->second.seq_num;
++it->second.seq_num;
std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
seq_num_str.exceptions(std::ios::failbit);
@ -225,18 +224,19 @@ std::pair<ResponsePtr, Undo> TestKeeperCreateRequest::process(TestKeeper::Contai
path_created += seq_num_str.str();
}
/// Increment sequential number even if node is not sequential
++it->second.seq_num;
response.path_created = path_created;
container.emplace(path_created, std::move(created_node));
undo = [&container, path_created, is_sequential = is_sequential, parent_path = it->first]
undo = [&container, path_created, parent_path = it->first]
{
container.erase(path_created);
auto & undo_parent = container.at(parent_path);
--undo_parent.stat.cversion;
--undo_parent.stat.numChildren;
if (is_sequential)
--undo_parent.seq_num;
--undo_parent.seq_num;
};
++it->second.stat.cversion;

View File

@ -67,6 +67,10 @@ static bool renameat2(const std::string & old_path, const std::string & new_path
/// Other cases when EINVAL can be returned should never happen.
if (errno == EINVAL)
return false;
/// We should never get ENOSYS on Linux, because we check kernel version in supportsRenameat2Impl().
/// However, we can get in on WSL.
if (errno == ENOSYS)
return false;
if (errno == EEXIST)
throwFromErrno("Cannot rename " + old_path + " to " + new_path + " because the second path already exists", ErrorCodes::ATOMIC_RENAME_FAIL);

View File

@ -799,9 +799,8 @@ namespace MySQLReplication
break;
}
case WRITE_ROWS_EVENT_V1:
case WRITE_ROWS_EVENT_V2:
{
if (do_replicate())
case WRITE_ROWS_EVENT_V2: {
if (doReplicate())
event = std::make_shared<WriteRowsEvent>(table_map, std::move(event_header));
else
event = std::make_shared<DryRunEvent>(std::move(event_header));
@ -810,9 +809,8 @@ namespace MySQLReplication
break;
}
case DELETE_ROWS_EVENT_V1:
case DELETE_ROWS_EVENT_V2:
{
if (do_replicate())
case DELETE_ROWS_EVENT_V2: {
if (doReplicate())
event = std::make_shared<DeleteRowsEvent>(table_map, std::move(event_header));
else
event = std::make_shared<DryRunEvent>(std::move(event_header));
@ -821,9 +819,8 @@ namespace MySQLReplication
break;
}
case UPDATE_ROWS_EVENT_V1:
case UPDATE_ROWS_EVENT_V2:
{
if (do_replicate())
case UPDATE_ROWS_EVENT_V2: {
if (doReplicate())
event = std::make_shared<UpdateRowsEvent>(table_map, std::move(event_header));
else
event = std::make_shared<DryRunEvent>(std::move(event_header));

View File

@ -549,7 +549,7 @@ namespace MySQLReplication
std::shared_ptr<TableMapEvent> table_map;
size_t checksum_signature_length = 4;
inline bool do_replicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); }
inline bool doReplicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); }
};
}

View File

@ -398,6 +398,9 @@ class IColumn;
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
\
M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated parser", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\

View File

@ -357,20 +357,23 @@ static DataTypePtr create(const ASTPtr & arguments)
throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR);
function_name = parametric->name;
const ASTs & parameters = parametric->arguments->children;
params_row.resize(parameters.size());
for (size_t i = 0; i < parameters.size(); ++i)
if (parametric->arguments)
{
const auto * literal = parameters[i]->as<ASTLiteral>();
if (!literal)
throw Exception(
ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
"Parameters to aggregate functions must be literals. "
"Got parameter '{}' for function '{}'",
parameters[i]->formatForErrorMessage(), function_name);
const ASTs & parameters = parametric->arguments->children;
params_row.resize(parameters.size());
params_row[i] = literal->value;
for (size_t i = 0; i < parameters.size(); ++i)
{
const auto * literal = parameters[i]->as<ASTLiteral>();
if (!literal)
throw Exception(
ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
"Parameters to aggregate functions must be literals. "
"Got parameter '{}' for function '{}'",
parameters[i]->formatForErrorMessage(), function_name);
params_row[i] = literal->value;
}
}
}
else if (auto opt_name = tryGetIdentifierName(arguments->children[0]))

View File

@ -72,20 +72,24 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR);
function_name = parametric->name;
const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children;
params_row.resize(parameters.size());
for (size_t i = 0; i < parameters.size(); ++i)
if (parametric->arguments)
{
const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
if (!lit)
throw Exception(
ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
"Parameters to aggregate functions must be literals. "
"Got parameter '{}' for function '{}'",
parameters[i]->formatForErrorMessage(), function_name);
const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children;
params_row.resize(parameters.size());
params_row[i] = lit->value;
for (size_t i = 0; i < parameters.size(); ++i)
{
const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
if (!lit)
throw Exception(
ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
"Parameters to aggregate functions must be literals. "
"Got parameter '{}' for function '{}'",
parameters[i]->formatForErrorMessage(),
function_name);
params_row[i] = lit->value;
}
}
}
else if (auto opt_name = tryGetIdentifierName(arguments->children[0]))

View File

@ -112,9 +112,12 @@ static void validateKeyTypes(const DataTypes & key_types)
if (key_types.empty() || key_types.size() > 2)
throw Exception{"Expected a single IP address or IP with mask", ErrorCodes::TYPE_MISMATCH};
const auto & actual_type = key_types[0]->getName();
if (actual_type != "UInt32" && actual_type != "FixedString(16)")
throw Exception{"Key does not match, expected either UInt32 or FixedString(16)", ErrorCodes::TYPE_MISMATCH};
const auto * key_ipv4type = typeid_cast<const DataTypeUInt32 *>(key_types[0].get());
const auto * key_ipv6type = typeid_cast<const DataTypeFixedString *>(key_types[0].get());
if (key_ipv4type == nullptr && (key_ipv6type == nullptr || key_ipv6type->getN() != 16))
throw Exception{"Key does not match, expected either `IPv4` (`UInt32`) or `IPv6` (`FixedString(16)`)",
ErrorCodes::TYPE_MISMATCH};
if (key_types.size() > 1)
{

View File

@ -99,38 +99,40 @@ void buildLayoutConfiguration(
root->appendChild(layout_element);
AutoPtr<Element> layout_type_element(doc->createElement(layout->layout_type));
layout_element->appendChild(layout_type_element);
for (const auto & param : layout->parameters->children)
{
const ASTPair * pair = param->as<ASTPair>();
if (!pair)
if (layout->parameters)
for (const auto & param : layout->parameters->children)
{
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary layout parameters must be key/value pairs, got '{}' instead",
param->formatForErrorMessage());
const ASTPair * pair = param->as<ASTPair>();
if (!pair)
{
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary layout parameters must be key/value pairs, got '{}' instead",
param->formatForErrorMessage());
}
const ASTLiteral * value_literal = pair->second->as<ASTLiteral>();
if (!value_literal)
{
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary layout parameter value must be a literal, got '{}' instead",
pair->second->formatForErrorMessage());
}
const auto value_field = value_literal->value;
if (value_field.getType() != Field::Types::UInt64
&& value_field.getType() != Field::Types::String)
{
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary layout parameter value must be an UInt64 or String, got '{}' instead",
value_field.getTypeName());
}
AutoPtr<Element> layout_type_parameter_element(doc->createElement(pair->first));
AutoPtr<Text> value_to_append(doc->createTextNode(toString(value_field)));
layout_type_parameter_element->appendChild(value_to_append);
layout_type_element->appendChild(layout_type_parameter_element);
}
const ASTLiteral * value_literal = pair->second->as<ASTLiteral>();
if (!value_literal)
{
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary layout parameter value must be a literal, got '{}' instead",
pair->second->formatForErrorMessage());
}
const auto value_field = value_literal->value;
if (value_field.getType() != Field::Types::UInt64
&& value_field.getType() != Field::Types::String)
{
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary layout parameter value must be an UInt64 or String, got '{}' instead",
value_field.getTypeName());
}
AutoPtr<Element> layout_type_parameter_element(doc->createElement(pair->first));
AutoPtr<Text> value_to_append(doc->createTextNode(toString(value_field)));
layout_type_parameter_element->appendChild(value_to_append);
layout_type_element->appendChild(layout_type_parameter_element);
}
}
/*

View File

@ -30,6 +30,7 @@ namespace ErrorCodes
extern const int UNKNOWN_POLICY;
extern const int UNKNOWN_VOLUME;
extern const int LOGICAL_ERROR;
extern const int NOT_ENOUGH_SPACE;
}
@ -210,6 +211,14 @@ ReservationPtr StoragePolicy::reserve(UInt64 bytes) const
}
ReservationPtr StoragePolicy::reserveAndCheck(UInt64 bytes) const
{
if (auto res = reserve(bytes, 0))
return res;
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Cannot reserve {}, not enough space", ReadableSize(bytes));
}
ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const
{
UInt64 max_space = 0;
@ -226,7 +235,14 @@ ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const
}
}
}
return max_disk->reserve(0);
auto reservation = max_disk->reserve(0);
if (!reservation)
{
/// I'm not sure if it's really a logical error, but exception message
/// "Cannot reserve 0 bytes" looks too strange to throw it with another exception code.
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot reserve 0 bytes");
}
return reservation;
}

View File

@ -61,10 +61,13 @@ public:
const String & getName() const { return name; }
/// Returns valid reservation or null
/// Returns valid reservation or nullptr
ReservationPtr reserve(UInt64 bytes) const;
/// Reserve space on any volume with index > min_volume_index
/// Reserves space on any volume or throws
ReservationPtr reserveAndCheck(UInt64 bytes) const;
/// Reserves space on any volume with index > min_volume_index or returns nullptr
ReservationPtr reserve(UInt64 bytes, size_t min_volume_index) const;
/// Find volume index, which contains disk

View File

@ -82,10 +82,9 @@ struct KeyHolder<CipherMode::MySQLCompatibility>
return foldEncryptionKeyInMySQLCompatitableMode(cipher_key_size, key, folded_key);
}
~KeyHolder()
{
OPENSSL_cleanse(folded_key.data(), folded_key.size());
}
/// There is a function to clear key securely.
/// It makes absolutely zero sense to call it here because
/// key comes from column and already copied multiple times through various memory buffers.
private:
std::array<char, EVP_MAX_KEY_LENGTH> folded_key;
@ -119,7 +118,7 @@ inline void validateCipherMode(const EVP_CIPHER * evp_cipher)
}
}
throw DB::Exception("Unsupported cipher mode " + std::string(EVP_CIPHER_name(evp_cipher)), DB::ErrorCodes::BAD_ARGUMENTS);
throw DB::Exception("Unsupported cipher mode", DB::ErrorCodes::BAD_ARGUMENTS);
}
template <CipherMode mode>

View File

@ -141,16 +141,16 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
/// The methods can be virtual or not depending on the template parameter. See IStringSource.
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wsuggest-override"
#elif __clang_major__ >= 11
#pragma GCC diagnostic push
#ifdef HAS_SUGGEST_OVERRIDE
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif
#ifdef HAS_SUGGEST_DESTRUCTOR_OVERRIDE
#pragma GCC diagnostic ignored "-Wsuggest-destructor-override"
#endif
# pragma GCC diagnostic push
# ifdef HAS_SUGGEST_OVERRIDE
# pragma GCC diagnostic ignored "-Wsuggest-override"
# endif
# ifdef HAS_SUGGEST_DESTRUCTOR_OVERRIDE
# pragma GCC diagnostic ignored "-Wsuggest-destructor-override"
# endif
#endif
template <typename Base>
@ -234,7 +234,7 @@ struct ConstSource : public Base
};
#if !__clang__ || __clang_major__ >= 11
#pragma GCC diagnostic pop
# pragma GCC diagnostic pop
#endif
struct StringSource
@ -355,9 +355,9 @@ struct UTF8StringSource : public StringSource
Slice getSliceFromLeft(size_t offset) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsForward(begin, offset, end);
const auto * begin = &elements[prev_offset];
const auto * end = elements.data() + offsets[row_num] - 1;
const auto * res_begin = skipCodePointsForward(begin, offset, end);
if (res_begin >= end)
return {begin, 0};
@ -367,14 +367,14 @@ struct UTF8StringSource : public StringSource
Slice getSliceFromLeft(size_t offset, size_t length) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsForward(begin, offset, end);
const auto * begin = &elements[prev_offset];
const auto * end = elements.data() + offsets[row_num] - 1;
const auto * res_begin = skipCodePointsForward(begin, offset, end);
if (res_begin >= end)
return {begin, 0};
auto res_end = skipCodePointsForward(res_begin, length, end);
const auto * res_end = skipCodePointsForward(res_begin, length, end);
if (res_end >= end)
return {res_begin, size_t(end - res_begin)};
@ -384,19 +384,19 @@ struct UTF8StringSource : public StringSource
Slice getSliceFromRight(size_t offset) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsBackward(end, offset, begin);
const auto * begin = &elements[prev_offset];
const auto * end = elements.data() + offsets[row_num] - 1;
const auto * res_begin = skipCodePointsBackward(end, offset, begin);
return {res_begin, size_t(end - res_begin)};
}
Slice getSliceFromRight(size_t offset, size_t length) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsBackward(end, offset, begin);
auto res_end = skipCodePointsForward(res_begin, length, end);
const auto * begin = &elements[prev_offset];
const auto * end = elements.data() + offsets[row_num] - 1;
const auto * res_begin = skipCodePointsBackward(end, offset, begin);
const auto * res_end = skipCodePointsForward(res_begin, length, end);
if (res_end >= end)
return {res_begin, size_t(end - res_begin)};
@ -495,7 +495,7 @@ struct IStringSource
virtual bool isEnd() const = 0;
virtual size_t getSizeForReserve() const = 0;
virtual Slice getWhole() const = 0;
virtual ~IStringSource() {}
virtual ~IStringSource() = default;
};

View File

@ -0,0 +1,29 @@
#include "FunctionFactory.h"
#include "countMatches.h"
namespace
{
struct FunctionCountMatchesCaseSensitive
{
static constexpr auto name = "countMatches";
static constexpr bool case_insensitive = false;
};
struct FunctionCountMatchesCaseInsensitive
{
static constexpr auto name = "countMatchesCaseInsensitive";
static constexpr bool case_insensitive = true;
};
}
namespace DB
{
void registerFunctionCountMatches(FunctionFactory & factory)
{
factory.registerFunction<FunctionCountMatches<FunctionCountMatchesCaseSensitive>>(FunctionFactory::CaseSensitive);
factory.registerFunction<FunctionCountMatches<FunctionCountMatchesCaseInsensitive>>(FunctionFactory::CaseSensitive);
}
}

View File

@ -0,0 +1,125 @@
#pragma once
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/Regexps.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
}
using Pos = const char *;
template <class CountMatchesBase>
class FunctionCountMatches : public IFunction
{
public:
static constexpr auto name = CountMatchesBase::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionCountMatches<CountMatchesBase>>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isStringOrFixedString(arguments[1].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of second argument (pattern) of function {}. Must be String/FixedString.",
arguments[1].type->getName(), getName());
if (!isStringOrFixedString(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument (haystack) of function {}. Must be String/FixedString.",
arguments[0].type->getName(), getName());
const auto * column = arguments[1].column.get();
if (!column || !checkAndGetColumnConstStringOrFixedString(column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"The second argument of function {} should be a constant string with the pattern",
getName());
return std::make_shared<DataTypeUInt64>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const ColumnConst * column_pattern = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
Regexps::Pool::Pointer re = Regexps::get<false /* like */, true /* is_no_capture */, CountMatchesBase::case_insensitive>(column_pattern->getValue<String>());
OptimizedRegularExpression::MatchVec matches;
const IColumn * column_haystack = arguments[0].column.get();
if (const ColumnString * col_str = checkAndGetColumn<ColumnString>(column_haystack))
{
auto result_column = ColumnUInt64::create();
const ColumnString::Chars & src_chars = col_str->getChars();
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
ColumnUInt64::Container & vec_res = result_column->getData();
vec_res.resize(input_rows_count);
size_t size = src_offsets.size();
ColumnString::Offset current_src_offset = 0;
for (size_t i = 0; i < size; ++i)
{
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
current_src_offset = src_offsets[i];
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
StringRef str(pos, end - pos);
vec_res[i] = countMatches(str, re, matches);
}
return result_column;
}
else if (const ColumnConst * col_const_str = checkAndGetColumnConstStringOrFixedString(column_haystack))
{
StringRef str = col_const_str->getDataColumn().getDataAt(0);
uint64_t matches_count = countMatches(str, re, matches);
return result_type->createColumnConst(input_rows_count, matches_count);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Error in FunctionCountMatches::getReturnTypeImpl()");
}
static uint64_t countMatches(StringRef src, Regexps::Pool::Pointer & re, OptimizedRegularExpression::MatchVec & matches)
{
/// Only one match is required, no need to copy more.
static const unsigned matches_limit = 1;
Pos pos = reinterpret_cast<Pos>(src.data);
Pos end = reinterpret_cast<Pos>(src.data + src.size);
uint64_t match_count = 0;
while (true)
{
if (pos >= end)
break;
if (!re->match(pos, end - pos, matches, matches_limit))
break;
/// Progress should be made, but with empty match the progress will not be done.
/// Also note that simply check is pattern empty is not enough,
/// since for example "'[f]{0}'" will match zero bytes:
if (!matches[0].length)
break;
pos += matches[0].offset + matches[0].length;
match_count++;
}
return match_count;
}
};
}

View File

@ -32,6 +32,7 @@ void registerFunctionTrim(FunctionFactory &);
void registerFunctionRegexpQuoteMeta(FunctionFactory &);
void registerFunctionNormalizeQuery(FunctionFactory &);
void registerFunctionNormalizedQueryHash(FunctionFactory &);
void registerFunctionCountMatches(FunctionFactory &);
#if USE_BASE64
void registerFunctionBase64Encode(FunctionFactory &);
@ -66,6 +67,7 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionRegexpQuoteMeta(factory);
registerFunctionNormalizeQuery(factory);
registerFunctionNormalizedQueryHash(factory);
registerFunctionCountMatches(factory);
#if USE_BASE64
registerFunctionBase64Encode(factory);
registerFunctionBase64Decode(factory);

View File

@ -208,6 +208,7 @@ SRCS(
cos.cpp
cosh.cpp
countDigits.cpp
countMatches.cpp
countSubstrings.cpp
countSubstringsCaseInsensitive.cpp
countSubstringsCaseInsensitiveUTF8.cpp

View File

@ -8,6 +8,8 @@
#include <IO/WriteBuffer.h>
#include <IO/ZlibDeflatingWriteBuffer.h>
#include <IO/ZlibInflatingReadBuffer.h>
#include <IO/ZstdDeflatingWriteBuffer.h>
#include <IO/ZstdInflatingReadBuffer.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
@ -34,6 +36,8 @@ std::string toContentEncodingName(CompressionMethod method)
return "br";
case CompressionMethod::Xz:
return "xz";
case CompressionMethod::Zstd:
return "zstd";
case CompressionMethod::None:
return "";
}
@ -61,11 +65,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s
return CompressionMethod::Brotli;
if (*method_str == "LZMA" || *method_str == "xz")
return CompressionMethod::Xz;
if (*method_str == "zstd" || *method_str == "zst")
return CompressionMethod::Zstd;
if (hint.empty() || hint == "auto" || hint == "none")
return CompressionMethod::None;
throw Exception(
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'br', 'xz' are supported as compression methods",
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'br', 'xz', 'zstd' are supported as compression methods",
ErrorCodes::NOT_IMPLEMENTED);
}
@ -81,6 +87,8 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
#endif
if (method == CompressionMethod::Xz)
return std::make_unique<LZMAInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
if (method == CompressionMethod::Zstd)
return std::make_unique<ZstdInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
if (method == CompressionMethod::None)
return nested;
@ -102,6 +110,9 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
if (method == CompressionMethod::Xz)
return std::make_unique<LZMADeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
if (method == CompressionMethod::Zstd)
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
if (method == CompressionMethod::None)
return nested;

View File

@ -28,6 +28,9 @@ enum class CompressionMethod
/// LZMA2-based content compression
/// This option corresponds to HTTP Content-Encoding: xz
Xz,
/// Zstd compressor
/// This option corresponds to HTTP Content-Encoding: zstd
Zstd,
Brotli
};

View File

@ -10,7 +10,7 @@
namespace DB::S3
{
PocoHTTPResponseStream::PocoHTTPResponseStream(std::shared_ptr<Poco::Net::HTTPClientSession> session_, std::istream & response_stream_)
: Aws::IStream(response_stream_.rdbuf()), session(std::move(session_))
: Aws::IOStream(response_stream_.rdbuf()), session(std::move(session_))
{
}

View File

@ -8,7 +8,7 @@ namespace DB::S3
/**
* Wrapper of IStream to store response stream and corresponding HTTP session.
*/
class PocoHTTPResponseStream : public Aws::IStream
class PocoHTTPResponseStream : public Aws::IOStream
{
public:
PocoHTTPResponseStream(std::shared_ptr<Poco::Net::HTTPClientSession> session_, std::istream & response_stream_);

View File

@ -29,7 +29,7 @@
#include <IO/DoubleConverter.h>
#include <IO/WriteBufferFromString.h>
#include <ryu/ryu.h>
#include <dragonbox/dragonbox_to_chars.h>
#include <Formats/FormatSettings.h>
@ -228,14 +228,14 @@ inline size_t writeFloatTextFastPath(T x, char * buffer)
if (DecomposedFloat64(x).is_inside_int64())
result = itoa(Int64(x), buffer) - buffer;
else
result = d2s_buffered_n(x, buffer);
result = jkj::dragonbox::to_chars_n(x, buffer) - buffer;
}
else
{
if (DecomposedFloat32(x).is_inside_int32())
result = itoa(Int32(x), buffer) - buffer;
else
result = f2s_buffered_n(x, buffer);
result = jkj::dragonbox::to_chars_n(x, buffer) - buffer;
}
if (result <= 0)

View File

@ -0,0 +1,95 @@
#include <IO/ZstdDeflatingWriteBuffer.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ZSTD_ENCODER_FAILED;
}
ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer(
std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
: BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment), out(std::move(out_))
{
cctx = ZSTD_createCCtx();
if (cctx == nullptr)
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level);
if (ZSTD_isError(ret))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder option setting failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING);
ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
if (ZSTD_isError(ret))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder option setting failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING);
input = {nullptr, 0, 0};
output = {nullptr, 0, 0};
}
ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer()
{
try
{
finish();
ZSTD_freeCCtx(cctx);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void ZstdDeflatingWriteBuffer::nextImpl()
{
if (!offset())
return;
ZSTD_EndDirective mode = ZSTD_e_flush;
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
bool finished = false;
do
{
out->nextIfAtEnd();
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
ZSTD_compressStream2(cctx, &output, &input, mode);
out->position() = out->buffer().begin() + output.pos;
finished = (input.pos == input.size);
} while (!finished);
}
void ZstdDeflatingWriteBuffer::finish()
{
if (flushed)
return;
next();
out->nextIfAtEnd();
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
size_t remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
if (ZSTD_isError(remaining))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder end failed: zstd version: {}", ZSTD_VERSION_STRING);
out->position() = out->buffer().begin() + output.pos;
flushed = true;
}
}

View File

@ -0,0 +1,40 @@
#pragma once
#include <IO/BufferWithOwnMemory.h>
#include <IO/CompressionMethod.h>
#include <IO/WriteBuffer.h>
#include <zstd.h>
namespace DB
{
/// Performs compression using zstd library and writes compressed data to out_ WriteBuffer.
class ZstdDeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
{
public:
ZstdDeflatingWriteBuffer(
std::unique_ptr<WriteBuffer> out_,
int compression_level,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);
/// Flush all pending data and write zstd footer to the underlying buffer.
/// After the first call to this function, subsequent calls will have no effect and
/// an attempt to write to this buffer will result in exception.
void finish();
~ZstdDeflatingWriteBuffer() override;
private:
void nextImpl() override;
std::unique_ptr<WriteBuffer> out;
ZSTD_CCtx * cctx;
ZSTD_inBuffer input;
ZSTD_outBuffer output;
bool flushed = false;
};
}

View File

@ -0,0 +1,63 @@
#include <IO/ZstdInflatingReadBuffer.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ZSTD_DECODER_FAILED;
}
ZstdInflatingReadBuffer::ZstdInflatingReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char * existing_memory, size_t alignment)
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment), in(std::move(in_))
{
dctx = ZSTD_createDCtx();
input = {nullptr, 0, 0};
output = {nullptr, 0, 0};
if (dctx == nullptr)
{
throw Exception(ErrorCodes::ZSTD_DECODER_FAILED, "zstd_stream_decoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
}
}
ZstdInflatingReadBuffer::~ZstdInflatingReadBuffer()
{
ZSTD_freeDCtx(dctx);
}
bool ZstdInflatingReadBuffer::nextImpl()
{
if (eof)
return false;
if (input.pos >= input.size)
{
in->nextIfAtEnd();
input.src = reinterpret_cast<unsigned char *>(in->position());
input.pos = 0;
input.size = in->buffer().end() - in->position();
}
output.dst = reinterpret_cast<unsigned char *>(internal_buffer.begin());
output.size = internal_buffer.size();
output.pos = 0;
size_t ret = ZSTD_decompressStream(dctx, &output, &input);
if (ZSTD_isError(ret))
throw Exception(
ErrorCodes::ZSTD_DECODER_FAILED, "Zstd stream decoding failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING);
in->position() = in->buffer().begin() + input.pos;
working_buffer.resize(output.pos);
if (in->eof())
{
eof = true;
return working_buffer.size() != 0;
}
return true;
}
}

View File

@ -0,0 +1,37 @@
#pragma once
#include <IO/BufferWithOwnMemory.h>
#include <IO/CompressionMethod.h>
#include <IO/ReadBuffer.h>
#include <zstd.h>
namespace DB
{
namespace ErrorCodes
{
}
class ZstdInflatingReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{
public:
ZstdInflatingReadBuffer(
std::unique_ptr<ReadBuffer> in_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);
~ZstdInflatingReadBuffer() override;
private:
bool nextImpl() override;
std::unique_ptr<ReadBuffer> in;
ZSTD_DCtx * dctx;
ZSTD_inBuffer input;
ZSTD_outBuffer output;
bool eof = false;
};
}

View File

@ -80,5 +80,8 @@ target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io)
add_executable (zlib_ng_bug zlib_ng_bug.cpp)
target_link_libraries (zlib_ng_bug PRIVATE ${ZLIB_LIBRARIES})
add_executable (ryu_test ryu_test.cpp)
target_link_libraries (ryu_test PRIVATE ryu)
add_executable (dragonbox_test dragonbox_test.cpp)
target_link_libraries (dragonbox_test PRIVATE dragonbox_to_chars)
add_executable (zstd_buffers zstd_buffers.cpp)
target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io)

View File

@ -1,8 +1,7 @@
#include <string>
#include <iostream>
#include <cstring>
#include <ryu/ryu.h>
#include <dragonbox/dragonbox_to_chars.h>
struct DecomposedFloat64
{
@ -84,7 +83,8 @@ int main(int argc, char ** argv)
double x = argc > 1 ? std::stod(argv[1]) : 0;
char buf[32];
d2s_buffered(x, buf);
std::cout << "dragonbox output" << std::endl;
jkj::dragonbox::to_chars(x, buf);
std::cout << buf << "\n";
std::cout << DecomposedFloat64(x).isInsideInt64() << "\n";

View File

@ -0,0 +1,66 @@
#include <iomanip>
#include <iostream>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <IO/ZstdDeflatingWriteBuffer.h>
#include <IO/ZstdInflatingReadBuffer.h>
#include <Common/Stopwatch.h>
int main(int, char **)
try
{
std::cout << std::fixed << std::setprecision(2);
size_t n = 10000000;
Stopwatch stopwatch;
{
auto buf
= std::make_unique<DB::WriteBufferFromFile>("test_zstd_buffers.zst", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
DB::ZstdDeflatingWriteBuffer zstd_buf(std::move(buf), /*compression level*/ 3);
stopwatch.restart();
for (size_t i = 0; i < n; ++i)
{
DB::writeIntText(i, zstd_buf);
DB::writeChar('\t', zstd_buf);
}
zstd_buf.finish();
stopwatch.stop();
std::cout << "Writing done. Elapsed: " << stopwatch.elapsedSeconds() << " s."
<< ", " << (zstd_buf.count() / stopwatch.elapsedSeconds() / 1000000) << " MB/s" << std::endl;
}
{
auto buf = std::make_unique<DB::ReadBufferFromFile>("test_zstd_buffers.zst");
DB::ZstdInflatingReadBuffer zstd_buf(std::move(buf));
stopwatch.restart();
for (size_t i = 0; i < n; ++i)
{
size_t x;
DB::readIntText(x, zstd_buf);
zstd_buf.ignore();
if (x != i)
throw DB::Exception("Failed!, read: " + std::to_string(x) + ", expected: " + std::to_string(i), 0);
}
stopwatch.stop();
std::cout << "Reading done. Elapsed: " << stopwatch.elapsedSeconds() << " s."
<< ", " << (zstd_buf.count() / stopwatch.elapsedSeconds() / 1000000) << " MB/s" << std::endl;
}
return 0;
}
catch (const DB::Exception & e)
{
std::cerr << e.what() << ", " << e.displayText() << std::endl;
return 1;
}

View File

@ -3,11 +3,16 @@ OWNER(g:clickhouse)
LIBRARY()
ADDINCL(
contrib/libs/zstd
)
PEERDIR(
clickhouse/src/Common
contrib/libs/brotli/dec
contrib/libs/brotli/enc
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/zstd
)
@ -58,6 +63,8 @@ SRCS(
WriteHelpers.cpp
ZlibDeflatingWriteBuffer.cpp
ZlibInflatingReadBuffer.cpp
ZstdDeflatingWriteBuffer.cpp
ZstdInflatingReadBuffer.cpp
copyData.cpp
createReadBufferFromFileBase.cpp
createWriteBufferFromFileBase.cpp

View File

@ -2,11 +2,16 @@ OWNER(g:clickhouse)
LIBRARY()
ADDINCL(
contrib/libs/zstd
)
PEERDIR(
clickhouse/src/Common
contrib/libs/brotli/dec
contrib/libs/brotli/enc
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/zstd
)

View File

@ -757,39 +757,102 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
/// If the function has an argument-lambda expression, you need to determine its type before the recursive call.
bool has_lambda_arguments = false;
size_t num_arguments = node.arguments->children.size();
for (size_t arg = 0; arg < num_arguments; ++arg)
if (node.arguments)
{
auto & child = node.arguments->children[arg];
const auto * function = child->as<ASTFunction>();
const auto * identifier = child->as<ASTIdentifier>();
if (function && function->name == "lambda")
size_t num_arguments = node.arguments->children.size();
for (size_t arg = 0; arg < num_arguments; ++arg)
{
/// If the argument is a lambda expression, just remember its approximate type.
if (function->arguments->children.size() != 2)
throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
auto & child = node.arguments->children[arg];
const auto * lambda_args_tuple = function->arguments->children.at(0)->as<ASTFunction>();
if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
has_lambda_arguments = true;
argument_types.emplace_back(std::make_shared<DataTypeFunction>(DataTypes(lambda_args_tuple->arguments->children.size())));
/// Select the name in the next cycle.
argument_names.emplace_back();
}
else if (function && function->name == "untuple")
{
auto columns = doUntuple(function, data);
if (columns.empty())
continue;
for (const auto & column : columns)
const auto * function = child->as<ASTFunction>();
const auto * identifier = child->as<ASTIdentifier>();
if (function && function->name == "lambda")
{
if (auto name_type = getNameAndTypeFromAST(column, data))
/// If the argument is a lambda expression, just remember its approximate type.
if (function->arguments->children.size() != 2)
throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto * lambda_args_tuple = function->arguments->children.at(0)->as<ASTFunction>();
if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
has_lambda_arguments = true;
argument_types.emplace_back(std::make_shared<DataTypeFunction>(DataTypes(lambda_args_tuple->arguments->children.size())));
/// Select the name in the next cycle.
argument_names.emplace_back();
}
else if (function && function->name == "untuple")
{
auto columns = doUntuple(function, data);
if (columns.empty())
continue;
for (const auto & column : columns)
{
if (auto name_type = getNameAndTypeFromAST(column, data))
{
argument_types.push_back(name_type->type);
argument_names.push_back(name_type->name);
}
else
arguments_present = false;
}
node.arguments->children.erase(node.arguments->children.begin() + arg);
node.arguments->children.insert(node.arguments->children.begin() + arg, columns.begin(), columns.end());
num_arguments += columns.size() - 1;
arg += columns.size() - 1;
}
else if (checkFunctionIsInOrGlobalInOperator(node) && arg == 1 && prepared_set)
{
ColumnWithTypeAndName column;
column.type = std::make_shared<DataTypeSet>();
/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
/// so that sets with the same literal representation do not fuse together (they can have different types).
if (!prepared_set->empty())
column.name = data.getUniqueName("__set");
else
column.name = child->getColumnName();
if (!data.hasColumn(column.name))
{
auto column_set = ColumnSet::create(1, prepared_set);
/// If prepared_set is not empty, we have a set made with literals.
/// Create a const ColumnSet to make constant folding work
if (!prepared_set->empty())
column.column = ColumnConst::create(std::move(column_set), 1);
else
column.column = std::move(column_set);
data.addColumn(column);
}
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
else if (identifier && (functionIsJoinGet(node.name) || functionIsDictGet(node.name)) && arg == 0)
{
auto table_id = IdentifierSemantic::extractDatabaseAndTable(*identifier);
table_id = data.context.resolveStorageID(table_id, Context::ResolveOrdinary);
auto column_string = ColumnString::create();
column_string->insert(table_id.getDatabaseName() + "." + table_id.getTableName());
ColumnWithTypeAndName column(
ColumnConst::create(std::move(column_string), 1),
std::make_shared<DataTypeString>(),
data.getUniqueName("__" + node.name));
data.addColumn(column);
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
else
{
/// If the argument is not a lambda expression, call it recursively and find out its type.
visit(child, data);
if (auto name_type = getNameAndTypeFromAST(child, data))
{
argument_types.push_back(name_type->type);
argument_names.push_back(name_type->name);
@ -797,125 +860,66 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
else
arguments_present = false;
}
node.arguments->children.erase(node.arguments->children.begin() + arg);
node.arguments->children.insert(node.arguments->children.begin() + arg, columns.begin(), columns.end());
num_arguments += columns.size() - 1;
arg += columns.size() - 1;
}
else if (checkFunctionIsInOrGlobalInOperator(node) && arg == 1 && prepared_set)
if (data.only_consts && !arguments_present)
return;
if (has_lambda_arguments && !data.only_consts)
{
ColumnWithTypeAndName column;
column.type = std::make_shared<DataTypeSet>();
function_builder->getLambdaArgumentTypes(argument_types);
/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
/// so that sets with the same literal representation do not fuse together (they can have different types).
if (!prepared_set->empty())
column.name = data.getUniqueName("__set");
else
column.name = child->getColumnName();
if (!data.hasColumn(column.name))
/// Call recursively for lambda expressions.
for (size_t i = 0; i < node.arguments->children.size(); ++i)
{
auto column_set = ColumnSet::create(1, prepared_set);
/// If prepared_set is not empty, we have a set made with literals.
/// Create a const ColumnSet to make constant folding work
if (!prepared_set->empty())
column.column = ColumnConst::create(std::move(column_set), 1);
else
column.column = std::move(column_set);
data.addColumn(column);
}
ASTPtr child = node.arguments->children[i];
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
else if (identifier && (functionIsJoinGet(node.name) || functionIsDictGet(node.name)) && arg == 0)
{
auto table_id = IdentifierSemantic::extractDatabaseAndTable(*identifier);
table_id = data.context.resolveStorageID(table_id, Context::ResolveOrdinary);
auto column_string = ColumnString::create();
column_string->insert(table_id.getDatabaseName() + "." + table_id.getTableName());
ColumnWithTypeAndName column(
ColumnConst::create(std::move(column_string), 1),
std::make_shared<DataTypeString>(),
data.getUniqueName("__" + node.name));
data.addColumn(column);
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
else
{
/// If the argument is not a lambda expression, call it recursively and find out its type.
visit(child, data);
if (auto name_type = getNameAndTypeFromAST(child, data))
{
argument_types.push_back(name_type->type);
argument_names.push_back(name_type->name);
}
else
arguments_present = false;
}
}
if (data.only_consts && !arguments_present)
return;
if (has_lambda_arguments && !data.only_consts)
{
function_builder->getLambdaArgumentTypes(argument_types);
/// Call recursively for lambda expressions.
for (size_t i = 0; i < node.arguments->children.size(); ++i)
{
ASTPtr child = node.arguments->children[i];
const auto * lambda = child->as<ASTFunction>();
if (lambda && lambda->name == "lambda")
{
const DataTypeFunction * lambda_type = typeid_cast<const DataTypeFunction *>(argument_types[i].get());
const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as<ASTFunction>();
const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children;
NamesAndTypesList lambda_arguments;
for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
const auto * lambda = child->as<ASTFunction>();
if (lambda && lambda->name == "lambda")
{
auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]);
if (!opt_arg_name)
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
const DataTypeFunction * lambda_type = typeid_cast<const DataTypeFunction *>(argument_types[i].get());
const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as<ASTFunction>();
const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children;
NamesAndTypesList lambda_arguments;
lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]);
for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
{
auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]);
if (!opt_arg_name)
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]);
}
data.actions_stack.pushLevel(lambda_arguments);
visit(lambda->arguments->children.at(1), data);
auto lambda_dag = data.actions_stack.popLevel();
String result_name = lambda->arguments->children.at(1)->getColumnName();
lambda_dag->removeUnusedActions(Names(1, result_name));
auto lambda_actions = std::make_shared<ExpressionActions>(lambda_dag);
DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
Names captured;
Names required = lambda_actions->getRequiredColumns();
for (const auto & required_arg : required)
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
captured.push_back(required_arg);
/// We can not name `getColumnName()`,
/// because it does not uniquely define the expression (the types of arguments can be different).
String lambda_name = data.getUniqueName("__lambda");
auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>(
lambda_actions, captured, lambda_arguments, result_type, result_name);
auto function_capture_adapter = std::make_shared<FunctionOverloadResolverAdaptor>(std::move(function_capture));
data.addFunction(function_capture_adapter, captured, lambda_name);
argument_types[i] = std::make_shared<DataTypeFunction>(lambda_type->getArgumentTypes(), result_type);
argument_names[i] = lambda_name;
}
data.actions_stack.pushLevel(lambda_arguments);
visit(lambda->arguments->children.at(1), data);
auto lambda_dag = data.actions_stack.popLevel();
String result_name = lambda->arguments->children.at(1)->getColumnName();
lambda_dag->removeUnusedActions(Names(1, result_name));
auto lambda_actions = std::make_shared<ExpressionActions>(lambda_dag);
DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
Names captured;
Names required = lambda_actions->getRequiredColumns();
for (const auto & required_arg : required)
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
captured.push_back(required_arg);
/// We can not name `getColumnName()`,
/// because it does not uniquely define the expression (the types of arguments can be different).
String lambda_name = data.getUniqueName("__lambda");
auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>(
lambda_actions, captured, lambda_arguments, result_type, result_name);
auto function_capture_adapter = std::make_shared<FunctionOverloadResolverAdaptor>(std::move(function_capture));
data.addFunction(function_capture_adapter, captured, lambda_name);
argument_types[i] = std::make_shared<DataTypeFunction>(lambda_type->getArgumentTypes(), result_type);
argument_names[i] = lambda_name;
}
}
}

View File

@ -97,10 +97,10 @@ public:
function_node->name == "any" || function_node->name == "anyLast"))
{
KeepAggregateFunctionVisitor::Data keep_data{data.group_by_keys, false};
KeepAggregateFunctionVisitor(keep_data).visit(function_node->arguments);
if (function_node->arguments) KeepAggregateFunctionVisitor(keep_data).visit(function_node->arguments);
/// Place argument of an aggregate function instead of function
if (!keep_data.keep_aggregator && !function_node->arguments->children.empty())
if (!keep_data.keep_aggregator && function_node->arguments && !function_node->arguments->children.empty())
{
String alias = function_node->alias;
ast = (function_node->arguments->children[0])->clone();

View File

@ -20,7 +20,7 @@ namespace
const ASTFunction * getInternalFunction(const ASTFunction & func)
{
if (func.arguments->children.size() == 1)
if (func.arguments && func.arguments->children.size() == 1)
return func.arguments->children[0]->as<ASTFunction>();
return nullptr;
}

Some files were not shown because too many files have changed in this diff Show More