Merge remote-tracking branch 'upstream/master' into HEAD

This commit is contained in:
Anton Popov 2023-06-07 11:23:13 +00:00
commit c980771c22
1144 changed files with 29250 additions and 11294 deletions

View File

@ -46,7 +46,12 @@ jobs:
- name: Python unit tests
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
echo "Testing the main ci directory"
python3 -m unittest discover -s . -p '*_test.py'
for dir in *_lambda/; do
echo "Testing $dir"
python3 -m unittest discover -s "$dir" -p '*_test.py'
done
DockerHubPushAarch64:
needs: CheckLabels
runs-on: [self-hosted, style-checker-aarch64]

15
.gitmodules vendored
View File

@ -35,10 +35,9 @@
[submodule "contrib/unixodbc"]
path = contrib/unixodbc
url = https://github.com/ClickHouse/UnixODBC
[submodule "contrib/protobuf"]
path = contrib/protobuf
url = https://github.com/ClickHouse/protobuf
branch = v3.13.0.1
[submodule "contrib/google-protobuf"]
path = contrib/google-protobuf
url = https://github.com/ClickHouse/google-protobuf.git
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse/boost
@ -267,10 +266,7 @@
url = https://github.com/ClickHouse/nats.c
[submodule "contrib/vectorscan"]
path = contrib/vectorscan
url = https://github.com/ClickHouse/vectorscan.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares
url = https://github.com/VectorCamp/vectorscan.git
[submodule "contrib/llvm-project"]
path = contrib/llvm-project
url = https://github.com/ClickHouse/llvm-project
@ -344,3 +340,6 @@
[submodule "contrib/isa-l"]
path = contrib/isa-l
url = https://github.com/ClickHouse/isa-l.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git

View File

@ -102,6 +102,17 @@ if (ENABLE_FUZZING)
set (ENABLE_PROTOBUF 1)
endif()
option (ENABLE_WOBOQ_CODEBROWSER "Build for woboq codebrowser" OFF)
if (ENABLE_WOBOQ_CODEBROWSER)
set (ENABLE_EMBEDDED_COMPILER 0)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-poison-system-directories")
# woboq codebrowser uses clang tooling, and they could add default system
# clang includes, and later clang will warn for those added by itself
# includes.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-poison-system-directories")
endif()
# Global libraries
# See:
# - default_libs.cmake
@ -259,8 +270,8 @@ endif ()
option (ENABLE_BUILD_PATH_MAPPING "Enable remapping of file source paths in debug info, predefined preprocessor macros, and __builtin_FILE(). It's used to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ${ENABLE_BUILD_PATH_MAPPING_DEFAULT})
if (ENABLE_BUILD_PATH_MAPPING)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.")
endif ()
option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
@ -557,7 +568,7 @@ if (NATIVE_BUILD_TARGETS
)
message (STATUS "Building native targets...")
set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native")
set (NATIVE_BUILD_DIR "${PROJECT_BINARY_DIR}/native")
execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
@ -571,7 +582,7 @@ if (NATIVE_BUILD_TARGETS
# Avoid overriding .cargo/config.toml with native toolchain.
"-DENABLE_RUST=OFF"
"-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}"
${CMAKE_SOURCE_DIR}
${PROJECT_SOURCE_DIR}
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
COMMAND_ECHO STDOUT)

View File

@ -22,12 +22,10 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16
* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25
* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - Jun 8 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Bangalore**](https://www.meetup.com/clickhouse-bangalore-user-group/events/293740066/) - Jun 7
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7
* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13
Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.

View File

@ -3,6 +3,7 @@
#include <cassert>
#include <stdexcept> // for std::logic_error
#include <string>
#include <type_traits>
#include <vector>
#include <functional>
#include <iosfwd>
@ -326,5 +327,16 @@ namespace ZeroTraits
inline void set(StringRef & x) { x.size = 0; }
}
namespace PackedZeroTraits
{
template <typename Second, template <typename, typename> class PackedPairNoInit>
inline bool check(const PackedPairNoInit<StringRef, Second> p)
{ return 0 == p.key.size; }
template <typename Second, template <typename, typename> class PackedPairNoInit>
inline void set(PackedPairNoInit<StringRef, Second> & p)
{ p.key.size = 0; }
}
std::ostream & operator<<(std::ostream & os, const StringRef & str);

View File

@ -28,14 +28,28 @@ uint64_t getMemoryAmountOrZero()
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
// CGroups v2
std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
if (cgroupv2_limit.is_open())
{
uint64_t memory_limit = 0; // in case of read error
cgroup_limit >> memory_limit;
uint64_t memory_limit = 0;
cgroupv2_limit >> memory_limit;
if (memory_limit > 0 && memory_limit < memory_amount)
memory_amount = memory_limit;
}
else
{
// CGroups v1
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
uint64_t memory_limit = 0; // in case of read error
cgroup_limit >> memory_limit;
if (memory_limit > 0 && memory_limit < memory_amount)
memory_amount = memory_limit;
}
}
#endif
return memory_amount;

View File

@ -274,7 +274,9 @@ void SocketImpl::shutdown()
int SocketImpl::sendBytes(const void* buffer, int length, int flags)
{
if (_isBrokenTimeout)
bool blocking = _blocking && (flags & MSG_DONTWAIT) == 0;
if (_isBrokenTimeout && blocking)
{
if (_sndTimeout.totalMicroseconds() != 0)
{
@ -289,11 +291,13 @@ int SocketImpl::sendBytes(const void* buffer, int length, int flags)
if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
rc = ::send(_sockfd, reinterpret_cast<const char*>(buffer), length, flags);
}
while (_blocking && rc < 0 && lastError() == POCO_EINTR);
while (blocking && rc < 0 && lastError() == POCO_EINTR);
if (rc < 0)
{
int err = lastError();
if (err == POCO_EAGAIN || err == POCO_ETIMEDOUT)
if ((err == POCO_EAGAIN || err == POCO_EWOULDBLOCK) && !blocking)
;
else if (err == POCO_EAGAIN || err == POCO_ETIMEDOUT)
throw TimeoutException();
else
error(err);

View File

@ -183,6 +183,16 @@ namespace Net
/// Returns true iff a reused session was negotiated during
/// the handshake.
virtual void setBlocking(bool flag);
/// Sets the socket in blocking mode if flag is true,
/// disables blocking mode if flag is false.
virtual bool getBlocking() const;
/// Returns the blocking mode of the socket.
/// This method will only work if the blocking modes of
/// the socket are changed via the setBlocking method!
protected:
void acceptSSL();
/// Assume per-object mutex is locked.

View File

@ -201,6 +201,16 @@ namespace Net
/// Returns true iff a reused session was negotiated during
/// the handshake.
virtual void setBlocking(bool flag);
/// Sets the socket in blocking mode if flag is true,
/// disables blocking mode if flag is false.
virtual bool getBlocking() const;
/// Returns the blocking mode of the socket.
/// This method will only work if the blocking modes of
/// the socket are changed via the setBlocking method!
protected:
void acceptSSL();
/// Performs a SSL server-side handshake.

View File

@ -629,5 +629,15 @@ bool SecureSocketImpl::sessionWasReused()
return false;
}
void SecureSocketImpl::setBlocking(bool flag)
{
_pSocket->setBlocking(flag);
}
bool SecureSocketImpl::getBlocking() const
{
return _pSocket->getBlocking();
}
} } // namespace Poco::Net

View File

@ -237,5 +237,15 @@ int SecureStreamSocketImpl::completeHandshake()
return _impl.completeHandshake();
}
bool SecureStreamSocketImpl::getBlocking() const
{
return _impl.getBlocking();
}
void SecureStreamSocketImpl::setBlocking(bool flag)
{
_impl.setBlocking(flag);
}
} } // namespace Poco::Net

View File

@ -5,11 +5,11 @@ if (NOT TARGET check)
if (CMAKE_CONFIGURATION_TYPES)
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
--force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
else ()
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
--force-new-ctest-process --output-on-failure
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
endif ()
endif ()

View File

@ -5,14 +5,14 @@ if (Git_FOUND)
# Commit hash + whether the building workspace was dirty or not
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_HASH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Branch name
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_BRANCH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
@ -20,14 +20,14 @@ if (Git_FOUND)
SET(ENV{TZ} "UTC")
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_DATE
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Subject of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%s
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
@ -35,7 +35,7 @@ if (Git_FOUND)
execute_process(
COMMAND ${GIT_EXECUTABLE} status
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
else()
message(STATUS "Git could not be found.")
endif()

View File

@ -7,6 +7,6 @@ message (STATUS "compiler CXX = ${CMAKE_CXX_COMPILER} ${FULL_CXX_FLAGS}")
message (STATUS "LINKER_FLAGS = ${FULL_EXE_LINKER_FLAGS}")
# Reproducible builds
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}")
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}")
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}")
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}")
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}")
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}")

View File

@ -8,6 +8,9 @@ option (SANITIZE "Enable one of the code sanitizers" "")
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
if (SANITIZE)
if (SANITIZE STREQUAL "address")
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
@ -29,14 +32,14 @@ if (SANITIZE)
# Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries.
# Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay.
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
elseif (SANITIZE STREQUAL "thread")
set (TSAN_FLAGS "-fsanitize=thread")
if (COMPILER_CLANG)
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/tsan_suppressions.txt")
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
@ -54,7 +57,7 @@ if (SANITIZE)
set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
endif()
if (COMPILER_CLANG)
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/ubsan_suppressions.txt")
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")

View File

@ -1,2 +1,2 @@
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1
wget https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
tar xJf MacOSX11.0.sdk.tar.xz --strip-components=1

View File

@ -1,4 +1,4 @@
include(${CMAKE_SOURCE_DIR}/cmake/autogenerated_versions.txt)
include(${PROJECT_SOURCE_DIR}/cmake/autogenerated_versions.txt)
set(VERSION_EXTRA "" CACHE STRING "")
set(VERSION_TWEAK "" CACHE STRING "")

View File

@ -88,7 +88,7 @@ add_contrib (thrift-cmake thrift)
# parquet/arrow/orc
add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion
add_contrib (avro-cmake avro) # requires: snappy
add_contrib (protobuf-cmake protobuf)
add_contrib (google-protobuf-cmake google-protobuf)
add_contrib (openldap-cmake openldap)
add_contrib (grpc-cmake grpc)
add_contrib (msgpack-c-cmake msgpack-c)
@ -156,7 +156,7 @@ add_contrib (libgsasl-cmake libgsasl) # requires krb5
add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
add_contrib (nats-io-cmake nats-io)
add_contrib (isa-l-cmake isa-l)
add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l
add_contrib (libhdfs3-cmake libhdfs3) # requires: google-protobuf, krb5, isa-l
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
add_contrib (cppkafka-cmake cppkafka)
add_contrib (libpqxx-cmake libpqxx)

View File

@ -6,7 +6,7 @@ if (NOT ENABLE_AVRO)
return()
endif()
set(AVROCPP_ROOT_DIR "${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++")
set(AVROCPP_ROOT_DIR "${PROJECT_SOURCE_DIR}/contrib/avro/lang/c++")
set(AVROCPP_INCLUDE_DIR "${AVROCPP_ROOT_DIR}/api")
set(AVROCPP_SOURCE_DIR "${AVROCPP_ROOT_DIR}/impl")

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit ecccfc026a42b30023289410a67024d561f4bf3e
Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3

2
contrib/aws-c-auth vendored

@ -1 +1 @@
Subproject commit 30df6c407e2df43bd244e2c34c9b4a4b87372bfb
Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12

@ -1 +1 @@
Subproject commit 324fd1d973ccb25c813aa747bf1759cfde5121c5
Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff

@ -1 +1 @@
Subproject commit 39bfa94a14b7126bf0c1330286ef8db452d87e66
Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d

2
contrib/aws-c-http vendored

@ -1 +1 @@
Subproject commit 2c5a2a7d5556600b9782ffa6c9d7e09964df1abc
Subproject commit dd34461987947672444d0bc872c5a733dfdb9711

2
contrib/aws-c-io vendored

@ -1 +1 @@
Subproject commit 5d32c453560d0823df521a686bf7fbacde7f9be3
Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89

2
contrib/aws-c-mqtt vendored

@ -1 +1 @@
Subproject commit 882c689561a3db1466330ccfe3b63637e0a575d3
Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194

2
contrib/aws-c-s3 vendored

@ -1 +1 @@
Subproject commit a41255ece72a7c887bba7f9d998ca3e14f4c8a1b
Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900

@ -1 +1 @@
Subproject commit 25bf5cf225f977c3accc6a05a0a7a181ef2a4a30
Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972

@ -1 +1 @@
Subproject commit 48e7c0e01479232f225c8044d76c84e74192889d
Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0

View File

@ -52,8 +52,8 @@ endif()
# Directories.
SET(AWS_SDK_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws")
SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-core")
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-s3")
SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-core")
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3")
SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth")
SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal")
@ -118,7 +118,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in"
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36")
list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC})
list(APPEND AWS_PUBLIC_INCLUDES

2
contrib/aws-crt-cpp vendored

@ -1 +1 @@
Subproject commit ec0bea288f451d884c0d80d534bc5c66241c39a4
Subproject commit 8a301b7e842f1daed478090c869207300972379f

2
contrib/aws-s2n-tls vendored

@ -1 +1 @@
Subproject commit 0f1ba9e5c4a67cb3898de0c0b4f911d4194dc8de
Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 8fe7b3326ef482ee6ecdf5a4f698f2b8c2780f98
Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17

2
contrib/c-ares vendored

@ -1 +1 @@
Subproject commit afee6748b0b99acf4509d42fa37ac8422262f91b
Subproject commit 6360e96b5cf8e5980c887ce58ef727e53d77243a

View File

@ -48,6 +48,7 @@ SET(SRCS
"${LIBRARY_DIR}/src/lib/ares_platform.c"
"${LIBRARY_DIR}/src/lib/ares_process.c"
"${LIBRARY_DIR}/src/lib/ares_query.c"
"${LIBRARY_DIR}/src/lib/ares_rand.c"
"${LIBRARY_DIR}/src/lib/ares_search.c"
"${LIBRARY_DIR}/src/lib/ares_send.c"
"${LIBRARY_DIR}/src/lib/ares_strcasecmp.c"

View File

@ -18,7 +18,7 @@ endif()
# Need to use C++17 since the compilation is not possible with C++20 currently.
set (CMAKE_CXX_STANDARD 17)
set(CASS_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/cassandra)
set(CASS_ROOT_DIR ${PROJECT_SOURCE_DIR}/contrib/cassandra)
set(CASS_SRC_DIR "${CASS_ROOT_DIR}/src")
set(CASS_INCLUDE_DIR "${CASS_ROOT_DIR}/include")

View File

@ -26,7 +26,7 @@ endif ()
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
# as the library that's built using embedded tzdata is also specific to OS_LINUX
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})

1
contrib/google-protobuf vendored Submodule

@ -0,0 +1 @@
Subproject commit c47efe2d8f6a60022b49ecd6cc23660687c8598f

View File

@ -5,7 +5,7 @@ if(NOT ENABLE_PROTOBUF)
return()
endif()
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
# #include <sanitizer/asan_interface.h>
@ -17,8 +17,8 @@ if(OS_FREEBSD AND SANITIZE STREQUAL "address")
endif()
endif()
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/protobuf")
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/protobuf")
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf")
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/google-protobuf")
add_definitions(-DGOOGLE_PROTOBUF_CMAKE_BUILD)
@ -35,7 +35,6 @@ set(libprotobuf_lite_files
${protobuf_source_dir}/src/google/protobuf/arena.cc
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
${protobuf_source_dir}/src/google/protobuf/field_access_listener.cc
${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven_lite.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
@ -86,6 +85,7 @@ set(libprotobuf_files
${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven.cc
${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc
@ -316,7 +316,7 @@ else ()
add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc")
endif ()
include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
add_library(_protobuf INTERFACE)
target_link_libraries(_protobuf INTERFACE _libprotobuf)

2
contrib/libgsasl vendored

@ -1 +1 @@
Subproject commit f4e7bf0bb068030d57266f87ccac4c8c012fb5c4
Subproject commit 0fb79e7609ae5a5e015a41d24bcbadd48f8f5469

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit a4e834839270a8c1f7ff1db351ba85afced3f0e2
Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d

View File

@ -1,7 +1,7 @@
# This file is a modified version of contrib/libuv/CMakeLists.txt
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv")
set (BINARY_DIR "${CMAKE_BINARY_DIR}/contrib/libuv")
set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/libuv")
set (BINARY_DIR "${PROJECT_BINARY_DIR}/contrib/libuv")
set(uv_sources
src/fs-poll.c

2
contrib/libxml2 vendored

@ -1 +1 @@
Subproject commit f507d167f1755b7eaea09fb1a44d29aab828b6d1
Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6

View File

@ -15,7 +15,7 @@ endif()
# This is the LGPL libmariadb project.
set(CC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/mariadb-connector-c)
set(CC_SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/mariadb-connector-c)
set(CC_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(WITH_SSL ON)

1
contrib/protobuf vendored

@ -1 +0,0 @@
Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2

View File

@ -25,6 +25,9 @@ message(STATUS "Intel QPL version: ${QPL_VERSION}")
# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib.
# Output ch_contrib::qpl by linking with 8 library targets.
# The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link
# only upstream isal (ch_contrib::isal) but at this point we can't.
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
# check nasm compiler
@ -308,7 +311,7 @@ target_include_directories(middle_layer_lib
target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB)
# [SUBDIR]c_api
file(GLOB_RECURSE QPL_C_API_SRC
file(GLOB_RECURSE QPL_C_API_SRC
${QPL_SRC_DIR}/c_api/*.c
${QPL_SRC_DIR}/c_api/*.cpp)

View File

@ -1,4 +1,4 @@
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy")
set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/snappy")
if (ARCH_S390X)
set (SNAPPY_IS_BIG_ENDIAN 1)

View File

@ -5,8 +5,8 @@ echo "Using sparse checkout for aws"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit 1f4d448314e581473103187765e4c949d01b4259
Subproject commit 38431d111781843741a781a57a6381a527d900a4

View File

@ -1,4 +1,4 @@
set (SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/zlib-ng)
set (SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/zlib-ng)
add_definitions(-DZLIB_COMPAT)
add_definitions(-DWITH_GZFILEOP)

View File

@ -46,10 +46,12 @@ ENV CXX=clang++-${LLVM_VERSION}
# Rust toolchain and libraries
ENV RUSTUP_HOME=/rust/rustup
ENV CARGO_HOME=/rust/cargo
ENV PATH="/rust/cargo/env:${PATH}"
ENV PATH="/rust/cargo/bin:${PATH}"
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
chmod 777 -R /rust && \
rustup toolchain install nightly && \
rustup default nightly && \
rustup component add rust-src && \
rustup target add aarch64-unknown-linux-gnu && \
rustup target add x86_64-apple-darwin && \
rustup target add x86_64-unknown-freebsd && \

View File

@ -11,9 +11,11 @@ ccache_status () {
[ -O /build ] || git config --global --add safe.directory /build
mkdir -p /build/cmake/toolchain/darwin-x86_64
tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then
mkdir -p /build/cmake/toolchain/darwin-x86_64
tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
fi
# Uncomment to debug ccache. Don't put ccache log in /output right away, or it
# will be confusingly packed into the "performance" package.

View File

@ -167,6 +167,7 @@ def parse_env_variables(
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
)
result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
elif is_cross_darwin_arm:
cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
@ -181,6 +182,7 @@ def parse_env_variables(
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
)
result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
elif is_cross_arm:
cc = compiler[: -len(ARM_SUFFIX)]
cmake_flags.append(

View File

@ -15,7 +15,7 @@ nproc=$(($(nproc) + 2)) # increase parallelism
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY"
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 "${CMAKE_FLAGS[@]}"
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DENABLE_WOBOQ_CODEBROWSER=ON "${CMAKE_FLAGS[@]}"
mkdir -p "$HTML_RESULT_DIRECTORY"
echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log'
/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \

View File

@ -626,7 +626,9 @@ if args.report == "main":
message_array.append(str(faster_queries) + " faster")
if slower_queries:
if slower_queries > 3:
# This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/tests/ci/performance_comparison_check.py#L225
# False positives rate should be < 1%: https://shorturl.at/CDEK8
if slower_queries > 5:
status = "failure"
message_array.append(str(slower_queries) + " slower")

View File

@ -132,6 +132,9 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--report-logs-stats')
clickhouse-test "00001_select_1" > /dev/null ||:
clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" ||:
set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \

View File

@ -3,5 +3,5 @@
set -x
service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test '';
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

View File

@ -65,6 +65,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
start
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
@ -94,6 +97,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
start
clickhouse-client --query="SELECT 'Server version: ', version()"

View File

@ -1,6 +0,0 @@
# ARM (AArch64) build works on Amazon Graviton, Oracle Cloud, Huawei Cloud ARM machines.
# The support for AArch64 is pre-production ready.
wget 'https://builds.clickhouse.com/master/aarch64/clickhouse'
chmod a+x ./clickhouse
sudo ./clickhouse install

View File

@ -1,3 +0,0 @@
fetch 'https://builds.clickhouse.com/master/freebsd/clickhouse'
chmod a+x ./clickhouse
su -m root -c './clickhouse install'

View File

@ -1,3 +0,0 @@
wget 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse'
chmod a+x ./clickhouse
./clickhouse

View File

@ -1,3 +0,0 @@
wget 'https://builds.clickhouse.com/master/macos/clickhouse'
chmod a+x ./clickhouse
./clickhouse

View File

@ -22,7 +22,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
### Install Prerequisites {#install-prerequisites}
``` bash
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg
```
### Install and Use the Clang compiler
@ -43,9 +43,14 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
As of April 2023, any version of Clang >= 15 will work.
GCC as a compiler is not supported
GCC as a compiler is not supported.
To build with a specific Clang version:
:::tip
This is optional, if you are following along and just now installed Clang then check
to see what version you have installed before setting this environment variable.
:::
``` bash
export CC=clang-16
export CXX=clang++-16
@ -109,18 +114,3 @@ mkdir build
cmake -S . -B build
cmake --build build
```
## You Dont Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
The CI checks build the binaries on each commit to [ClickHouse](https://github.com/clickhouse/clickhouse/). To download them:
1. Open the [commits list](https://github.com/ClickHouse/ClickHouse/commits/master)
1. Choose a **Merge pull request** commit that includes the new feature, or was added after the new feature
1. Click the status symbol (yellow dot, red x, green check) to open the CI check list
1. Scroll through the list until you find **ClickHouse build check x/x artifact groups are OK**
1. Click **Details**
1. Find the type of package for your operating system that you need and download the files.
![build artifact check](images/find-build-artifact.png)

View File

@ -119,7 +119,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication.
## Specifics and Recommendations {#specifics-and-recommendations}

View File

@ -55,7 +55,7 @@ ATTACH TABLE postgres_database.new_table;
```
:::warning
Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
Before version 22.1, adding a table to replication left a non-removed temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
:::
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
@ -257,7 +257,7 @@ Please note that this should be used only if it is actually needed. If there is
1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privilege.
3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.

View File

@ -30,7 +30,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe
## Specifics and Recommendations {#specifics-and-recommendations}
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multi-tasked.
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
## Usage Example {#usage-example}

View File

@ -120,3 +120,93 @@ Values can be updated using the `ALTER TABLE` query. The primary key cannot be u
```sql
ALTER TABLE test UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
```
### Joins
A special `direct` join with EmbeddedRocksDB tables is supported.
This direct join avoids forming a hash table in memory and accesses
the data directly from the EmbeddedRocksDB.
With large joins you may see much lower memory usage with direct joins
because the hash table is not created.
To enable direct joins:
```sql
SET join_algorithm = 'direct, hash'
```
:::tip
When the `join_algorithm` is set to `direct, hash`, direct joins will be used
when possible, and hash otherwise.
:::
#### Example
##### Create and populate an EmbeddedRocksDB table:
```sql
CREATE TABLE rdb
(
`key` UInt32,
`value` Array(UInt32),
`value2` String
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY key
```
```sql
INSERT INTO rdb
SELECT
toUInt32(sipHash64(number) % 10) as key,
[key, key+1] as value,
('val2' || toString(key)) as value2
FROM numbers_mt(10);
```
##### Create and populate a table to join with table `rdb`:
```sql
CREATE TABLE t2
(
`k` UInt16
)
ENGINE = TinyLog
```
```sql
INSERT INTO t2 SELECT number AS k
FROM numbers_mt(10)
```
##### Set the join algorithm to `direct`:
```sql
SET join_algorithm = 'direct'
```
##### An INNER JOIN:
```sql
SELECT *
FROM
(
SELECT k AS key
FROM t2
) AS t2
INNER JOIN rdb ON rdb.key = t2.key
ORDER BY key ASC
```
```response
┌─key─┬─rdb.key─┬─value──┬─value2─┐
│ 0 │ 0 │ [0,1] │ val20 │
│ 2 │ 2 │ [2,3] │ val22 │
│ 3 │ 3 │ [3,4] │ val23 │
│ 6 │ 6 │ [6,7] │ val26 │
│ 7 │ 7 │ [7,8] │ val27 │
│ 8 │ 8 │ [8,9] │ val28 │
│ 9 │ 9 │ [9,10] │ val29 │
└─────┴─────────┴────────┴────────┘
```
### More information on Joins
- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#settings-join_algorithm)
- [JOIN clause](/docs/en/sql-reference/statements/select/join.md)

View File

@ -156,7 +156,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
| rpc\_client\_connect\_timeout | 600 * 1000 |
| rpc\_client\_read\_timeout | 3600 * 1000 |
| rpc\_client\_write\_timeout | 3600 * 1000 |
| rpc\_client\_socekt\_linger\_timeout | -1 |
| rpc\_client\_socket\_linger\_timeout | -1 |
| rpc\_client\_connect\_retry | 10 |
| rpc\_client\_timeout | 3600 * 1000 |
| dfs\_default\_replica | 3 |
@ -176,7 +176,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
| output\_write\_timeout | 3600 * 1000 |
| output\_close\_timeout | 3600 * 1000 |
| output\_packetpool\_size | 1024 |
| output\_heeartbeat\_interval | 10 * 1000 |
| output\_heartbeat\_interval | 10 * 1000 |
| dfs\_client\_failover\_max\_attempts | 15 |
| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 |
| dfs\_client\_socketcache\_expiryMsec | 3000 |

View File

@ -6,7 +6,7 @@ sidebar_label: Hive
# Hive
The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
The Hive engine allows you to perform `SELECT` queries on HDFS Hive table. Currently it supports input formats as below:
- Text: only supports simple scalar column types except `binary`

View File

@ -10,7 +10,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
`NATS` lets you:
- Publish or subcribe to message subjects.
- Publish or subscribe to message subjects.
- Process new messages as they become available.
## Creating a Table {#table_engine-redisstreams-creating-a-table}
@ -46,7 +46,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Required parameters:
- `nats_url` host:port (for example, `localhost:5672`)..
- `nats_subjects` List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
- `nats_subjects` List of subject for NATS table to subscribe/publish to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
- `nats_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
Optional parameters:

View File

@ -13,8 +13,8 @@ The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data th
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
```
@ -57,7 +57,7 @@ or via config (since version 21.11):
</named_collections>
```
Some parameters can be overriden by key value arguments:
Some parameters can be overridden by key value arguments:
``` sql
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
```

View File

@ -42,7 +42,6 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[rabbitmq_queue_consume = false,]
[rabbitmq_address = '',]
[rabbitmq_vhost = '/',]
[rabbitmq_queue_consume = false,]
[rabbitmq_username = '',]
[rabbitmq_password = '',]
[rabbitmq_commit_on_select = false,]

View File

@ -23,7 +23,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
### PARTITION BY
@ -131,14 +131,17 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `32Mb`.
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `16Mb`.
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
- `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`.
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.

View File

@ -109,7 +109,7 @@ INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
VALUES (1667446031, 1, 6, 3)
```
The data are inserted in both the table and the materialized view `test.mv_visits`.
The data is inserted in both the table and the materialized view `test.mv_visits`.
To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`:

View File

@ -78,7 +78,7 @@ ENGINE = MergeTree
ORDER BY id;
```
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyper parameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
@ -135,7 +135,7 @@ ORDER BY id;
Annoy supports `L2Distance` and `cosineDistance`.
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time trade-off between better accuracy and speed.
__Example__:
``` sql

View File

@ -165,7 +165,7 @@ Performance of such a query heavily depends on the table layout. Because of that
The key factors for a good performance:
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will under-utilize the machine
- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing
- partitions should be comparable in size, so all threads will do roughly the same amount of work

View File

@ -15,6 +15,18 @@ tokenized cells of the string column. For example, the string cell "I will be a
" wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
useful the resulting inverted index will be.
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/O_MnyUkrIq8"
width="640"
height="360"
frameborder="0"
allow="autoplay;
fullscreen;
picture-in-picture"
allowfullscreen>
</iframe>
</div>
:::note
Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.

View File

@ -779,7 +779,7 @@ Disks, volumes and storage policies should be declared inside the `<storage_conf
:::tip
Disks can also be declared in the `SETTINGS` section of a query. This is useful
for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
for ad-hoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
See [dynamic storage](#dynamic-storage) for more details.
:::
@ -856,7 +856,7 @@ Tags:
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
Cofiguration examples:
Configuration examples:
``` xml
<storage_configuration>
@ -1219,11 +1219,12 @@ Authentication parameters (the disk will try all available methods **and** Manag
* `account_name` and `account_key` - For authentication using Shared Key.
Limit parameters (mainly for internal usage):
* `max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
* `min_bytes_for_seek` - Limits the size of a seekable region.
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
Other parameters:
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.

View File

@ -258,4 +258,4 @@ Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](.
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) description
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting
- [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions
- [shardNum()](../../../sql-reference/functions/other-functions.md#shardnum) and [shardCount()](../../../sql-reference/functions/other-functions.md#shardcount) functions

View File

@ -65,7 +65,7 @@ if __name__ == "__main__":
main()
```
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings every time you run a `SELECT` from `my_executable_table`:
```sql
CREATE TABLE my_executable_table (
@ -223,4 +223,4 @@ SETTINGS
pool_size = 4;
```
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.

View File

@ -72,7 +72,7 @@ Additionally, number of keys will have a soft limit of 4 for the number of keys.
If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it.
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect.
Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.
## Supported operations {#table_engine-KeeperMap-supported-operations}

View File

@ -87,7 +87,7 @@ ORDER BY (marketplace, review_date, product_category);
3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include.
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of multiple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
```sql
INSERT INTO amazon_reviews
@ -473,4 +473,4 @@ It runs quite a bit faster - which means the cache is helping us out here:
└────────────┴───────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘
50 rows in set. Elapsed: 33.954 sec. Processed 150.96 million rows, 68.95 GB (4.45 million rows/s., 2.03 GB/s.)
```
```

View File

@ -317,7 +317,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
:::
![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
![Add ClickHouse as a Superset data source](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
### Add the table **cell_towers** as a Superset **dataset**
@ -364,5 +364,5 @@ The data is also available for interactive queries in the [Playground](https://p
This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the host name and port number).
:::

View File

@ -36,8 +36,8 @@ The data is in CSV files but uses a semi-colon for the delimiter. The rows look
│ 7389 │ BMP180 │ 3735 │ 50.136 │ 11.062 │ 2019-06-01T00:00:06 │ 98905 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 12.1 │
│ 13199 │ BMP180 │ 6664 │ 52.514 │ 13.44 │ 2019-06-01T00:00:07 │ 101855.54 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.74 │
│ 12753 │ BMP180 │ 6440 │ 44.616 │ 2.032 │ 2019-06-01T00:00:07 │ 99475 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17 │
│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │
└───────────┴─────────────┴──────────┴────────┴───────┴─────────────────────┴──────────┴──────────┴───────────────────┴─────────────┘
│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │
└───────────┴─────────────┴──────────┴────────┴───────┴─────────────────────┴──────────┴──────────┴───────────────────┴─────────────┘
```
2. We will use the following `MergeTree` table to store the data in ClickHouse:

View File

@ -806,7 +806,7 @@ FROM
31 rows in set. Elapsed: 0.043 sec. Processed 7.54 million rows, 40.53 MB (176.71 million rows/s., 950.40 MB/s.)
```
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unrealiable due to the filtering of the docs filter during data insertion.
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unreliable due to the filtering of the docs filter during data insertion.
## Authors with the most diverse impact
@ -940,7 +940,7 @@ LIMIT 10
10 rows in set. Elapsed: 0.106 sec. Processed 798.15 thousand rows, 13.97 MB (7.51 million rows/s., 131.41 MB/s.)
```
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the basename of the file to identify his popular files - this allows for renames and should focus on code contributions.
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the base name of the file to identify his popular files - this allows for renames and should focus on code contributions.
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBiYXNlLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIChhdXRob3IgPSAnQWxleGV5IE1pbG92aWRvdicpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYmFzZW5hbWUocGF0aCkgQVMgYmFzZQpPUkRFUiBCWSBjIERFU0MKTElNSVQgMTA=)

View File

@ -75,7 +75,7 @@ SELECT
payment_type,
pickup_ntaname,
dropoff_ntaname
FROM s3(
FROM gcs(
'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz',
'TabSeparatedWithNames'
);

View File

@ -9,7 +9,7 @@ The data in this dataset is derived and cleaned from the full OpenSky dataset to
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
"Crowdsourced air traffic data from the OpenSky Network 20192020"
Earth System Science Data 13(2), 2021
https://doi.org/10.5194/essd-13-357-2021

View File

@ -0,0 +1,720 @@
---
slug: /en/getting-started/example-datasets/reddit-comments
sidebar_label: Reddit comments
---
# Reddit comments dataset
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 14B rows of data. The raw data is in JSON format in compressed files and the rows look like the following:
```json
{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411}
{"created_utc":1134365725,"author_flair_css_class":null,"score":1,"ups":1,"subreddit":"reddit.com","stickied":false,"link_id":"t3_17866","subreddit_id":"t5_6","controversiality":0,"body":"The site states \"What can I use it for? Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more...\", just like any other new breeed of sites that want us to store everything we have on the web. And they even guarantee multiple levels of security and encryption etc. But what prevents these web site operators fom accessing and/or stealing Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more, for competitive or personal gains...? I am pretty sure that most of them are honest, but what's there to prevent me from setting up a good useful site and stealing all your data? Call me paranoid - I am.","retrieved_on":1473738411,"distinguished":null,"gilded":0,"id":"c14","edited":false,"parent_id":"t3_17866","author":"zse7zse","author_flair_text":null}
{"gilded":0,"distinguished":null,"retrieved_on":1473738411,"author":"[deleted]","author_flair_text":null,"edited":false,"id":"c15","parent_id":"t3_17869","subreddit":"reddit.com","score":0,"ups":0,"created_utc":1134366848,"author_flair_css_class":null,"body":"Jython related topics by Frank Wierzbicki","controversiality":0,"subreddit_id":"t5_6","stickied":false,"link_id":"t3_17869"}
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"[deleted]","edited":false,"parent_id":"t3_17870","id":"c16","subreddit":"reddit.com","created_utc":1134367660,"author_flair_css_class":null,"score":1,"ups":1,"body":"[deleted]","controversiality":0,"stickied":false,"link_id":"t3_17870","subreddit_id":"t5_6"}
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"rjoseph","edited":false,"id":"c17","parent_id":"t3_17817","subreddit":"reddit.com","author_flair_css_class":null,"created_utc":1134367754,"score":1,"ups":1,"body":"Saft is by far the best extension you could tak onto your Safari","controversiality":0,"link_id":"t3_17817","stickied":false,"subreddit_id":"t5_6"}
```
A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket.
:::note
The following commands were executed on a Production instance of ClickHouse Cloud with the minimum memory set to 720GB. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
:::
1. Let's create a table for the Reddit data:
```sql
CREATE TABLE reddit
(
subreddit LowCardinality(String),
subreddit_id LowCardinality(String),
subreddit_type Enum('public' = 1, 'restricted' = 2, 'user' = 3, 'archived' = 4, 'gold_restricted' = 5, 'private' = 6),
author LowCardinality(String),
body String CODEC(ZSTD(6)),
created_date Date DEFAULT toDate(created_utc),
created_utc DateTime,
retrieved_on DateTime,
id String,
parent_id String,
link_id String,
score Int32,
total_awards_received UInt16,
controversiality UInt8,
gilded UInt8,
collapsed_because_crowd_control UInt8,
collapsed_reason Enum('' = 0, 'comment score below threshold' = 1, 'may be sensitive content' = 2, 'potentially toxic' = 3, 'potentially toxic content' = 4),
distinguished Enum('' = 0, 'moderator' = 1, 'admin' = 2, 'special' = 3),
removal_reason Enum('' = 0, 'legal' = 1),
author_created_utc DateTime,
author_fullname LowCardinality(String),
author_patreon_flair UInt8,
author_premium UInt8,
can_gild UInt8,
can_mod_post UInt8,
collapsed UInt8,
is_submitter UInt8,
_edited String,
locked UInt8,
quarantined UInt8,
no_follow UInt8,
send_replies UInt8,
stickied UInt8,
author_flair_text LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (subreddit, created_date, author);
```
:::note
The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `2005-12` to `2023-02`. The compression changes a couple of times though, so the file extensions are not consistent. For example:
- the file names are initially `RC_2005-12.bz2` to `RC_2017-11.bz2`
- then they look like `RC_2017-12.xz` to `RC_2018-09.xz`
- and finally `RC_2018-10.zst` to `RC_2023-02.zst`
:::
2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017:
```sql
INSERT INTO reddit
SELECT *
FROM s3(
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
'JSONEachRow'
);
```
3. It will take a while depending on your resources, but when it's done verify it worked:
```sql
SELECT formatReadableQuantity(count())
FROM reddit;
```
```response
┌─formatReadableQuantity(count())─┐
│ 85.97 million │
└─────────────────────────────────┘
```
4. Let's see how many unique subreddits were in December of 2017:
```sql
SELECT uniqExact(subreddit)
FROM reddit;
```
```response
┌─uniqExact(subreddit)─┐
│ 91613 │
└──────────────────────┘
1 row in set. Elapsed: 1.572 sec. Processed 85.97 million rows, 367.43 MB (54.71 million rows/s., 233.80 MB/s.)
```
5. This query returns the top 10 subreddits (in terms of number of comments):
```sql
SELECT
subreddit,
count() AS c
FROM reddit
GROUP BY subreddit
ORDER BY c DESC
LIMIT 20;
```
```response
┌─subreddit───────┬───────c─┐
│ AskReddit │ 5245881 │
│ politics │ 1753120 │
│ nfl │ 1220266 │
│ nba │ 960388 │
│ The_Donald │ 931857 │
│ news │ 796617 │
│ worldnews │ 765709 │
│ CFB │ 710360 │
│ gaming │ 602761 │
│ movies │ 601966 │
│ soccer │ 590628 │
│ Bitcoin │ 583783 │
│ pics │ 563408 │
│ StarWars │ 562514 │
│ funny │ 547563 │
│ leagueoflegends │ 517213 │
│ teenagers │ 492020 │
│ DestinyTheGame │ 477377 │
│ todayilearned │ 472650 │
│ videos │ 450581 │
└─────────────────┴─────────┘
20 rows in set. Elapsed: 0.368 sec. Processed 85.97 million rows, 367.43 MB (233.34 million rows/s., 997.25 MB/s.)
```
6. Here are the top 10 authors in December of 2017, in terms of number of comments posted:
```sql
SELECT
author,
count() AS c
FROM reddit
GROUP BY author
ORDER BY c DESC
LIMIT 10;
```
```response
┌─author──────────┬───────c─┐
│ [deleted] │ 5913324 │
│ AutoModerator │ 784886 │
│ ImagesOfNetwork │ 83241 │
│ BitcoinAllBot │ 54484 │
│ imguralbumbot │ 45822 │
│ RPBot │ 29337 │
│ WikiTextBot │ 25982 │
│ Concise_AMA_Bot │ 19974 │
│ MTGCardFetcher │ 19103 │
│ TotesMessenger │ 19057 │
└─────────────────┴─────────┘
10 rows in set. Elapsed: 8.143 sec. Processed 85.97 million rows, 711.05 MB (10.56 million rows/s., 87.32 MB/s.)
```
7. We already inserted some data, but we will start over:
```sql
TRUNCATE TABLE reddit;
```
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. For practical reasons, it works well to insert the data by years starting with...
```sql
INSERT INTO reddit
SELECT *
FROM s3Cluster(
'default',
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2005*',
'JSONEachRow'
)
SETTINGS zstd_window_log_max = 31;
```
...and ending with:
```sql
INSERT INTO reddit
SELECT *
FROM s3Cluster(
'default',
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2023*',
'JSONEachRow'
)
SETTINGS zstd_window_log_max = 31;
```
If you do not have a cluster, use `s3` instead of `s3Cluster`:
```sql
INSERT INTO reddit
SELECT *
FROM s3(
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2005*',
'JSONEachRow'
)
SETTINGS zstd_window_log_max = 31;
```
8. To verify it worked, here are the number of rows per year (as of February, 2023):
```sql
SELECT
toYear(created_utc) AS year,
formatReadableQuantity(count())
FROM reddit
GROUP BY year;
```
```response
┌─year─┬─formatReadableQuantity(count())─┐
│ 2005 │ 1.07 thousand │
│ 2006 │ 417.18 thousand │
│ 2007 │ 2.46 million │
│ 2008 │ 7.24 million │
│ 2009 │ 18.86 million │
│ 2010 │ 42.93 million │
│ 2011 │ 28.91 million │
│ 2012 │ 260.31 million │
│ 2013 │ 402.21 million │
│ 2014 │ 531.80 million │
│ 2015 │ 667.76 million │
│ 2016 │ 799.90 million │
│ 2017 │ 972.86 million │
│ 2018 │ 1.24 billion │
│ 2019 │ 1.66 billion │
│ 2020 │ 2.16 billion │
│ 2021 │ 2.59 billion │
│ 2022 │ 2.82 billion │
│ 2023 │ 474.86 million │
└──────┴─────────────────────────────────┘
```
9. Let's see how many rows were inserted and how much disk space the table is using:
```sql
SELECT
sum(rows) AS count,
formatReadableQuantity(count),
formatReadableSize(sum(bytes)) AS disk_size,
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
FROM system.parts
WHERE (table = 'reddit') AND active;
```
Notice the compression of disk storage is about 1/3 of the uncompressed size:
```response
┌───────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size─┬─uncompressed_size─┐
│ 14688534662 │ 14.69 billion │ 1.03 TiB │ 3.26 TiB │
└─────────────┴───────────────────────────────────┴───────────┴───────────────────┘
1 row in set. Elapsed: 0.005 sec.
```
9. The following query shows how many comments, authors and subreddits we have for each month:
```sql
SELECT
toStartOfMonth(created_utc) AS firstOfMonth,
count() AS c,
bar(c, 0, 50000000, 25) AS bar_count,
uniq(author) AS authors,
bar(authors, 0, 5000000, 25) AS bar_authors,
uniq(subreddit) AS subreddits,
bar(subreddits, 0, 100000, 25) AS bar_subreddits
FROM reddit
GROUP BY firstOfMonth
ORDER BY firstOfMonth ASC;
```
This is a substantial query that has to process all 14.69 billion rows, but we still get an impressive response time (about 48 seconds):
```response
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬──authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
│ 2010-07-01 │ 806612 │ ▍ │ 76486 │ ▍ │ 1955 │ ▍ │
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
│ 2010-12-01 │ 3642690 │ █▊ │ 196847 │ ▉ │ 3914 │ ▉ │
│ 2011-01-01 │ 3924540 │ █▉ │ 215057 │ █ │ 4240 │ █ │
│ 2011-02-01 │ 3859131 │ █▉ │ 223485 │ █ │ 4371 │ █ │
│ 2011-03-01 │ 2877996 │ █▍ │ 208607 │ █ │ 3870 │ ▉ │
│ 2011-04-01 │ 3859131 │ █▉ │ 248931 │ █▏ │ 4881 │ █▏ │
│ 2011-06-01 │ 3859131 │ █▉ │ 267197 │ █▎ │ 5255 │ █▎ │
│ 2011-08-01 │ 2943405 │ █▍ │ 259428 │ █▎ │ 5806 │ █▍ │
│ 2011-10-01 │ 3859131 │ █▉ │ 327342 │ █▋ │ 6958 │ █▋ │
│ 2011-12-01 │ 3728313 │ █▊ │ 354817 │ █▊ │ 7713 │ █▉ │
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
│ 2019-10-01 │ 145909884 │ █████████████████████████ │ 7160126 │ █████████████████████████ │ 152075 │ █████████████████████████ │
│ 2019-11-01 │ 138512489 │ █████████████████████████ │ 7098723 │ █████████████████████████ │ 164597 │ █████████████████████████ │
│ 2019-12-01 │ 146012313 │ █████████████████████████ │ 7438261 │ █████████████████████████ │ 166966 │ █████████████████████████ │
│ 2020-01-01 │ 153498208 │ █████████████████████████ │ 7703548 │ █████████████████████████ │ 174390 │ █████████████████████████ │
│ 2020-02-01 │ 148386817 │ █████████████████████████ │ 7582031 │ █████████████████████████ │ 170257 │ █████████████████████████ │
│ 2020-03-01 │ 166266315 │ █████████████████████████ │ 8339049 │ █████████████████████████ │ 192460 │ █████████████████████████ │
│ 2020-04-01 │ 178511581 │ █████████████████████████ │ 8991649 │ █████████████████████████ │ 202334 │ █████████████████████████ │
│ 2020-05-01 │ 189993779 │ █████████████████████████ │ 9331358 │ █████████████████████████ │ 217357 │ █████████████████████████ │
│ 2020-06-01 │ 187914434 │ █████████████████████████ │ 9085003 │ █████████████████████████ │ 223362 │ █████████████████████████ │
│ 2020-07-01 │ 194244994 │ █████████████████████████ │ 9321706 │ █████████████████████████ │ 228222 │ █████████████████████████ │
│ 2020-08-01 │ 196099301 │ █████████████████████████ │ 9368408 │ █████████████████████████ │ 230251 │ █████████████████████████ │
│ 2020-09-01 │ 182549761 │ █████████████████████████ │ 9271571 │ █████████████████████████ │ 227889 │ █████████████████████████ │
│ 2020-10-01 │ 186583890 │ █████████████████████████ │ 9396112 │ █████████████████████████ │ 233715 │ █████████████████████████ │
│ 2020-11-01 │ 186083723 │ █████████████████████████ │ 9623053 │ █████████████████████████ │ 234963 │ █████████████████████████ │
│ 2020-12-01 │ 191317162 │ █████████████████████████ │ 9898168 │ █████████████████████████ │ 249115 │ █████████████████████████ │
│ 2021-01-01 │ 210496207 │ █████████████████████████ │ 10503943 │ █████████████████████████ │ 259805 │ █████████████████████████ │
│ 2021-02-01 │ 193510365 │ █████████████████████████ │ 10215033 │ █████████████████████████ │ 253656 │ █████████████████████████ │
│ 2021-03-01 │ 207454415 │ █████████████████████████ │ 10365629 │ █████████████████████████ │ 267263 │ █████████████████████████ │
│ 2021-04-01 │ 204573086 │ █████████████████████████ │ 10391984 │ █████████████████████████ │ 270543 │ █████████████████████████ │
│ 2021-05-01 │ 217655366 │ █████████████████████████ │ 10648130 │ █████████████████████████ │ 288555 │ █████████████████████████ │
│ 2021-06-01 │ 208027069 │ █████████████████████████ │ 10397311 │ █████████████████████████ │ 291520 │ █████████████████████████ │
│ 2021-07-01 │ 210955954 │ █████████████████████████ │ 10063967 │ █████████████████████████ │ 252061 │ █████████████████████████ │
│ 2021-08-01 │ 225681244 │ █████████████████████████ │ 10383556 │ █████████████████████████ │ 254569 │ █████████████████████████ │
│ 2021-09-01 │ 220086513 │ █████████████████████████ │ 10298344 │ █████████████████████████ │ 256826 │ █████████████████████████ │
│ 2021-10-01 │ 227527379 │ █████████████████████████ │ 10729882 │ █████████████████████████ │ 283328 │ █████████████████████████ │
│ 2021-11-01 │ 228289963 │ █████████████████████████ │ 10995197 │ █████████████████████████ │ 302386 │ █████████████████████████ │
│ 2021-12-01 │ 235807471 │ █████████████████████████ │ 11312798 │ █████████████████████████ │ 313876 │ █████████████████████████ │
│ 2022-01-01 │ 256766679 │ █████████████████████████ │ 12074520 │ █████████████████████████ │ 340407 │ █████████████████████████ │
│ 2022-02-01 │ 219927645 │ █████████████████████████ │ 10846045 │ █████████████████████████ │ 293236 │ █████████████████████████ │
│ 2022-03-01 │ 236554668 │ █████████████████████████ │ 11330285 │ █████████████████████████ │ 302387 │ █████████████████████████ │
│ 2022-04-01 │ 231188077 │ █████████████████████████ │ 11697995 │ █████████████████████████ │ 316303 │ █████████████████████████ │
│ 2022-05-01 │ 230492108 │ █████████████████████████ │ 11448584 │ █████████████████████████ │ 323725 │ █████████████████████████ │
│ 2022-06-01 │ 218842949 │ █████████████████████████ │ 11400399 │ █████████████████████████ │ 324846 │ █████████████████████████ │
│ 2022-07-01 │ 242504279 │ █████████████████████████ │ 12049204 │ █████████████████████████ │ 335621 │ █████████████████████████ │
│ 2022-08-01 │ 247215325 │ █████████████████████████ │ 12189276 │ █████████████████████████ │ 337873 │ █████████████████████████ │
│ 2022-09-01 │ 234131223 │ █████████████████████████ │ 11674079 │ █████████████████████████ │ 326325 │ █████████████████████████ │
│ 2022-10-01 │ 237365072 │ █████████████████████████ │ 11804508 │ █████████████████████████ │ 336063 │ █████████████████████████ │
│ 2022-11-01 │ 229478878 │ █████████████████████████ │ 11543020 │ █████████████████████████ │ 323122 │ █████████████████████████ │
│ 2022-12-01 │ 238862690 │ █████████████████████████ │ 11967451 │ █████████████████████████ │ 331668 │ █████████████████████████ │
│ 2023-01-01 │ 253577512 │ █████████████████████████ │ 12264087 │ █████████████████████████ │ 332711 │ █████████████████████████ │
│ 2023-02-01 │ 221285501 │ █████████████████████████ │ 11537091 │ █████████████████████████ │ 317879 │ █████████████████████████ │
└──────────────┴───────────┴───────────────────────────┴──────────┴───────────────────────────┴────────────┴───────────────────────────┘
203 rows in set. Elapsed: 48.492 sec. Processed 14.69 billion rows, 213.35 GB (302.91 million rows/s., 4.40 GB/s.)
```
10. Here are the top 10 subreddits of 2022:
```sql
SELECT
subreddit,
count() AS count
FROM reddit
WHERE toYear(created_utc) = 2022
GROUP BY subreddit
ORDER BY count DESC
LIMIT 10;
```
```response
┌─subreddit──────┬────count─┐
│ AskReddit │ 72312060 │
│ AmItheAsshole │ 25323210 │
│ teenagers │ 22355960 │
│ worldnews │ 17797707 │
│ FreeKarma4U │ 15652274 │
│ FreeKarma4You │ 14929055 │
│ wallstreetbets │ 14235271 │
│ politics │ 12511136 │
│ memes │ 11610792 │
│ nba │ 11586571 │
└────────────────┴──────────┘
10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.)
```
11. Let's see which subreddits had the biggest increase in comments from 2018 to 2019:
```sql
SELECT
subreddit,
newcount - oldcount AS diff
FROM
(
SELECT
subreddit,
count(*) AS newcount
FROM reddit
WHERE toYear(created_utc) = 2019
GROUP BY subreddit
)
ALL INNER JOIN
(
SELECT
subreddit,
count(*) AS oldcount
FROM reddit
WHERE toYear(created_utc) = 2018
GROUP BY subreddit
) USING (subreddit)
ORDER BY diff DESC
LIMIT 50
SETTINGS joined_subquery_requires_alias = 0;
```
It looks like memes and teenagers were busy on Reddit in 2019:
```response
┌─subreddit────────────┬─────diff─┐
│ AskReddit │ 18765909 │
│ memes │ 16496996 │
│ teenagers │ 13071715 │
│ AmItheAsshole │ 12312663 │
│ dankmemes │ 12016716 │
│ unpopularopinion │ 6809935 │
│ PewdiepieSubmissions │ 6330844 │
│ Market76 │ 5213690 │
│ relationship_advice │ 4060717 │
│ Minecraft │ 3328659 │
│ freefolk │ 3227970 │
│ classicwow │ 3063133 │
│ Animemes │ 2866876 │
│ gonewild │ 2457680 │
│ PublicFreakout │ 2452288 │
│ gameofthrones │ 2411661 │
│ RoastMe │ 2378781 │
│ ShitPostCrusaders │ 2345414 │
│ AnthemTheGame │ 1813152 │
│ nfl │ 1804407 │
│ Showerthoughts │ 1797968 │
│ Cringetopia │ 1764034 │
│ pokemon │ 1763269 │
│ entitledparents │ 1744852 │
│ HistoryMemes │ 1721645 │
│ MortalKombat │ 1718184 │
│ trashy │ 1684357 │
│ ChapoTrapHouse │ 1675363 │
│ Brawlstars │ 1663763 │
│ iamatotalpieceofshit │ 1647381 │
│ ukpolitics │ 1599204 │
│ cursedcomments │ 1590781 │
│ Pikabu │ 1578597 │
│ wallstreetbets │ 1535225 │
│ AskOuija │ 1533214 │
│ interestingasfuck │ 1528910 │
│ aww │ 1439008 │
│ wholesomememes │ 1436566 │
│ SquaredCircle │ 1432172 │
│ insanepeoplefacebook │ 1290686 │
│ borderlands3 │ 1274462 │
│ FreeKarma4U │ 1217769 │
│ YangForPresidentHQ │ 1186918 │
│ FortniteCompetitive │ 1184508 │
│ AskMen │ 1180820 │
│ EpicSeven │ 1172061 │
│ MurderedByWords │ 1112476 │
│ politics │ 1084087 │
│ barstoolsports │ 1068020 │
│ BattlefieldV │ 1053878 │
└──────────────────────┴──────────┘
50 rows in set. Elapsed: 10.680 sec. Processed 29.38 billion rows, 198.67 GB (2.75 billion rows/s., 18.60 GB/s.)
```
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all 14.69 billion comments three times for a substring, but the performance is actually quite impressive. (Unfortunately ClickHouse users are not very active on Reddit yet):
```sql
SELECT
toStartOfQuarter(created_utc) AS quarter,
sum(if(positionCaseInsensitive(body, 'clickhouse') > 0, 1, 0)) AS clickhouse,
sum(if(positionCaseInsensitive(body, 'snowflake') > 0, 1, 0)) AS snowflake,
sum(if(positionCaseInsensitive(body, 'postgres') > 0, 1, 0)) AS postgres
FROM reddit
GROUP BY quarter
ORDER BY quarter ASC;
```
```response
┌────quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
│ 2005-10-01 │ 0 │ 0 │ 0 │
│ 2006-01-01 │ 0 │ 2 │ 23 │
│ 2006-04-01 │ 0 │ 2 │ 24 │
│ 2006-07-01 │ 0 │ 4 │ 13 │
│ 2006-10-01 │ 0 │ 23 │ 73 │
│ 2007-01-01 │ 0 │ 14 │ 91 │
│ 2007-04-01 │ 0 │ 10 │ 59 │
│ 2007-07-01 │ 0 │ 39 │ 116 │
│ 2007-10-01 │ 0 │ 45 │ 125 │
│ 2008-01-01 │ 0 │ 53 │ 234 │
│ 2008-04-01 │ 0 │ 79 │ 303 │
│ 2008-07-01 │ 0 │ 102 │ 174 │
│ 2008-10-01 │ 0 │ 156 │ 323 │
│ 2009-01-01 │ 0 │ 206 │ 208 │
│ 2009-04-01 │ 0 │ 178 │ 417 │
│ 2009-07-01 │ 0 │ 300 │ 295 │
│ 2009-10-01 │ 0 │ 633 │ 589 │
│ 2010-01-01 │ 0 │ 555 │ 501 │
│ 2010-04-01 │ 0 │ 587 │ 469 │
│ 2010-07-01 │ 0 │ 601 │ 696 │
│ 2010-10-01 │ 0 │ 1246 │ 505 │
│ 2011-01-01 │ 0 │ 758 │ 247 │
│ 2011-04-01 │ 0 │ 537 │ 113 │
│ 2011-07-01 │ 0 │ 173 │ 64 │
│ 2011-10-01 │ 0 │ 649 │ 96 │
│ 2012-01-01 │ 0 │ 4621 │ 662 │
│ 2012-04-01 │ 0 │ 5737 │ 785 │
│ 2012-07-01 │ 0 │ 6097 │ 1127 │
│ 2012-10-01 │ 0 │ 7986 │ 600 │
│ 2013-01-01 │ 0 │ 9704 │ 839 │
│ 2013-04-01 │ 0 │ 8161 │ 853 │
│ 2013-07-01 │ 0 │ 9704 │ 1028 │
│ 2013-10-01 │ 0 │ 12879 │ 1404 │
│ 2014-01-01 │ 0 │ 12317 │ 1548 │
│ 2014-04-01 │ 0 │ 13181 │ 1577 │
│ 2014-07-01 │ 0 │ 15640 │ 1710 │
│ 2014-10-01 │ 0 │ 19479 │ 1959 │
│ 2015-01-01 │ 0 │ 20411 │ 2104 │
│ 2015-04-01 │ 1 │ 20309 │ 9112 │
│ 2015-07-01 │ 0 │ 20325 │ 4771 │
│ 2015-10-01 │ 0 │ 25087 │ 3030 │
│ 2016-01-01 │ 0 │ 23462 │ 3126 │
│ 2016-04-01 │ 3 │ 25496 │ 2757 │
│ 2016-07-01 │ 4 │ 28233 │ 2928 │
│ 2016-10-01 │ 2 │ 45445 │ 2449 │
│ 2017-01-01 │ 9 │ 76019 │ 2808 │
│ 2017-04-01 │ 9 │ 67919 │ 2803 │
│ 2017-07-01 │ 13 │ 68974 │ 2771 │
│ 2017-10-01 │ 12 │ 69730 │ 2906 │
│ 2018-01-01 │ 17 │ 67476 │ 3152 │
│ 2018-04-01 │ 3 │ 67139 │ 3986 │
│ 2018-07-01 │ 14 │ 67979 │ 3609 │
│ 2018-10-01 │ 28 │ 74147 │ 3850 │
│ 2019-01-01 │ 14 │ 80250 │ 4305 │
│ 2019-04-01 │ 30 │ 70307 │ 3872 │
│ 2019-07-01 │ 33 │ 77149 │ 4164 │
│ 2019-10-01 │ 22 │ 113011 │ 4369 │
│ 2020-01-01 │ 34 │ 238273 │ 5133 │
│ 2020-04-01 │ 52 │ 454467 │ 6100 │
│ 2020-07-01 │ 37 │ 406623 │ 5507 │
│ 2020-10-01 │ 49 │ 212143 │ 5385 │
│ 2021-01-01 │ 56 │ 151262 │ 5749 │
│ 2021-04-01 │ 71 │ 119928 │ 6039 │
│ 2021-07-01 │ 53 │ 110342 │ 5765 │
│ 2021-10-01 │ 92 │ 121144 │ 6401 │
│ 2022-01-01 │ 93 │ 107512 │ 6772 │
│ 2022-04-01 │ 120 │ 91560 │ 6687 │
│ 2022-07-01 │ 183 │ 99764 │ 7377 │
│ 2022-10-01 │ 123 │ 99447 │ 7052 │
│ 2023-01-01 │ 126 │ 58733 │ 4891 │
└────────────┴────────────┴───────────┴──────────┘
70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.)

View File

@ -22,7 +22,7 @@ The steps below will easily work on a local install of ClickHouse too. The only
## Step-by-step instructions
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result:
```sql
DESCRIBE s3Cluster(
@ -322,7 +322,7 @@ ORDER BY month ASC;
A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty).
### More subtitiles over time and when
### More subtitles over time and when
With advances in speech recognition, its easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then?
@ -484,4 +484,4 @@ ARRAY JOIN
│ 20th │ 16 │
│ 10th │ 6 │
└────────────┴─────────┘
```
```

View File

@ -28,23 +28,25 @@ The quickest and easiest way to get up and running with ClickHouse is to create
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
:::
On Linux and macOS:
On Linux, macOS and FreeBSD:
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server, clickhouse-client, clickhouse-local,
ClickHouse Keeper, and other tools:
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the
following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server,
clickhouse-client, clickhouse-local, ClickHouse Keeper, and other tools:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the following command to start the ClickHouse server:
```bash
./clickhouse server
```
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
1. Open a new terminal and use the **clickhouse-client** to connect to your service:
1. Open a new terminal and use the **./clickhouse client** to connect to your service:
```bash
./clickhouse client
@ -330,7 +332,9 @@ For production environments, its recommended to use the latest `stable`-versi
To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside.
### From Sources {#from-sources}
## Non-Production Deployments (Advanced)
### Compile From Source {#from-sources}
To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [macOS](/docs/en/development/build-osx.md).
@ -346,8 +350,33 @@ Youll need to create data and metadata folders manually and `chown` them for
On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sources.
### From CI checks pre-built binaries
ClickHouse binaries are built for each [commit](/docs/en/development/build.md#you-dont-have-to-build-clickhouse).
### Install a CI-generated Binary
ClickHouse's continuous integration (CI) infrastructure produces specialized builds for each commit in the [ClickHouse
repository](https://github.com/clickhouse/clickhouse/), e.g. [sanitized](https://github.com/google/sanitizers) builds, unoptimized (Debug)
builds, cross-compiled builds etc. While such builds are normally only useful during development, they can in certain situations also be
interesting for users.
:::note
Since ClickHouse's CI is evolving over time, the exact steps to download CI-generated builds may vary.
Also, CI may delete too old build artifacts, making them unavailable for download.
:::
For example, to download a aarch64 binary for ClickHouse v23.4, follow these steps:
- Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238)
- Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install.
- Click the green check / yellow dot / red cross to open the list of CI checks.
- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
- Find the rows with compiler = "clang-*-aarch64" - there are multiple rows.
- Download the artifacts for these builds.
To download binaries for very old x86-64 systems without [SSE3](https://en.wikipedia.org/wiki/SSE3) support or old ARM systems without
[ARMv8.1-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.1-A) support, open a [pull
request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI check "BuilderBinAmd64Compat", respectively
"BuilderBinAarch64V80Compat". Then click "Details", open the "Build" fold, scroll to the end, find message "Notice: Build URLs
https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the
build.
## Launch {#launch}

View File

@ -177,11 +177,11 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--user, -u` The username. Default value: default.
- `--password` The password. Default value: empty string.
- `--ask-password` - Prompt the user to enter a password.
- `--query, -q` The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option.
- `--queries-file` file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).
- `--query, -q` The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` file path with queries to execute. Cannot be used simultaneously with `--query`.
- `--multiquery, -n` If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
- `--multiline, -m` If specified, allow multiline queries (do not send the query on Enter).
- `--multiquery, -n` If specified, allow processing multiple queries separated by semicolons.
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).
- `--format, -f` Use the specified default format to output the result.
- `--vertical, -E` If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables.
- `--time, -t` If specified, print the query execution time to stderr in non-interactive mode.

View File

@ -467,6 +467,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`.
- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`.
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
## CSVWithNames {#csvwithnames}

View File

@ -2,34 +2,115 @@
slug: /en/operations/named-collections
sidebar_position: 69
sidebar_label: "Named collections"
title: "Named collections"
---
# Storing details for connecting to external sources in configuration files
Named collections provide a way to store collections of key-value pairs to be
used to configure integrations with external sources. You can use named collections with
dictionaries, tables, table functions, and object storage.
Details for connecting to external sources (dictionaries, tables, table functions) can be saved
in configuration files and thus simplify the creation of objects and hide credentials
from users with only SQL access.
Named collections can be configured with DDL or in configuration files and are applied
when ClickHouse starts. They simplify the creation of objects and the hiding of credentials
from users without administrative access.
Parameters can be set in XML `<format>CSV</format>` and overridden in SQL `, format = 'TSV'`.
The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`.
The keys in a named collection must match the parameter names of the corresponding
function, table engine, database, etc. In the examples below the parameter list is
linked to for each type.
Named collections are stored in the `config.xml` file of the ClickHouse server in the `<named_collections>` section and are applied when ClickHouse starts.
Parameters set in a named collection can be overridden in SQL, this is shown in the examples
below.
Example of configuration:
```xml
$ cat /etc/clickhouse-server/config.d/named_collections.xml
## Storing named collections in the system database
### DDL example
```sql
CREATE NAMED COLLECTION name AS
key_1 = 'value',
key_2 = 'value2',
url = 'https://connection.url/'
```
### Permissions to create named collections with DDL
To manage named collections with DDL a user must have the `named_control_collection` privilege. This can be assigned by adding a file to `/etc/clickhouse-server/users.d/`. The example gives the user `default` both the `access_management` and `named_collection_control` privileges:
```xml title='/etc/clickhouse-server/users.d/user_default.xml'
<clickhouse>
<users>
<default>
<password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex replace=true>
<access_management>1</access_management>
<!-- highlight-start -->
<named_collection_control>1</named_collection_control>
<!-- highlight-end -->
</default>
</users>
</clickhouse>
```
:::tip
In the above example the `passowrd_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user.
:::
## Storing named collections in configuration files
### XML example
```xml title='/etc/clickhouse-server/config.d/named_collections.xml'
<clickhouse>
<named_collections>
...
<name>
<key_1>value</key_1>
<key_2>value_2</key_2>
<url>https://connection.url/</url>
</name>
</named_collections>
</clickhouse>
```
## Named collections for accessing S3.
## Modifying named collections
Named collections that are created with DDL queries can be altered or dropped with DDL. Named collections created with XML files can be managed by editing or deleting the corresponding XML.
### Alter a DDL named collection
Change or add the keys `key1` and `key3` of the collection `collection2`:
```sql
ALTER NAMED COLLECTION collection2 SET key1=4, key3='value3'
```
Remove the key `key2` from `collection2`:
```sql
ALTER NAMED COLLECTION collection2 DELETE key2
```
Change or add the key `key1` and delete the key `key3` of the collection `collection2`:
```sql
ALTER NAMED COLLECTION collection2 SET key1=4, DELETE key3
```
### Drop the DDL named collection `collection2`:
```sql
DROP NAMED COLLECTION collection2
```
## Named collections for accessing S3
The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md).
Example of configuration:
### DDL example
```sql
CREATE NAMED COLLECTION s3_mydata AS
access_key_id = 'AKIAIOSFODNN7EXAMPLE',
secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
format = 'CSV',
url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/'
```
### XML example
```xml
<clickhouse>
<named_collections>
@ -43,23 +124,23 @@ Example of configuration:
</clickhouse>
```
### Example of using named collections with the s3 function
### s3() function and S3 Table named collection examples
Both of the following examples use the same named collection `s3_mydata`:
#### s3() function
```sql
INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz',
format = 'TSV', structure = 'number UInt64', compression_method = 'gzip')
SELECT * FROM numbers(10000);
SELECT count()
FROM s3(s3_mydata, filename = 'test_file.tsv.gz')
┌─count()─┐
│ 10000 │
└─────────┘
1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.)
```
### Example of using named collections with an S3 table
:::tip
The first argument to the `s3()` function above is the name of the collection, `s3_mydata`. Without named collections, the access key ID, secret, format, and URL would all be passed in every call to the `s3()` function.
:::
#### S3 table
```sql
CREATE TABLE s3_engine_table (number Int64)
@ -78,7 +159,22 @@ SELECT * FROM s3_engine_table LIMIT 3;
The description of parameters see [mysql](../sql-reference/table-functions/mysql.md).
Example of configuration:
### DDL example
```sql
CREATE NAMED COLLECTION mymysql AS
user = 'myuser',
password = 'mypass',
host = '127.0.0.1',
port = 3306,
database = 'test',
connection_pool_size = 8,
on_duplicate_clause = 1,
replace_query = 1
```
### XML example
```xml
<clickhouse>
<named_collections>
@ -96,7 +192,11 @@ Example of configuration:
</clickhouse>
```
### Example of using named collections with the mysql function
### mysql() function, MySQL table, MySQL database, and Dictionary named collection examples
The four following examples use the same named collection `mymysql`:
#### mysql() function
```sql
SELECT count() FROM mysql(mymysql, table = 'test');
@ -105,8 +205,11 @@ SELECT count() FROM mysql(mymysql, table = 'test');
│ 3 │
└─────────┘
```
:::note
The named collection does not specify the `table` parameter, so it is specified in the function call as `table = 'test'`.
:::
### Example of using named collections with an MySQL table
#### MySQL table
```sql
CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0);
@ -117,7 +220,11 @@ SELECT count() FROM mytable;
└─────────┘
```
### Example of using named collections with database with engine MySQL
:::note
The DDL overrides the named collection setting for connection_pool_size.
:::
#### MySQL database
```sql
CREATE DATABASE mydatabase ENGINE = MySQL(mymysql);
@ -130,7 +237,7 @@ SHOW TABLES FROM mydatabase;
└────────┘
```
### Example of using named collections with a dictionary with source MySQL
#### MySQL Dictionary
```sql
CREATE DICTIONARY dict (A Int64, B String)
@ -150,6 +257,17 @@ SELECT dictGet('dict', 'B', 2);
The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md).
```sql
CREATE NAMED COLLECTION mypg AS
user = 'pguser',
password = 'jw8s0F4',
host = '127.0.0.1',
port = 5432,
database = 'test',
schema = 'test_schema',
connection_pool_size = 8
```
Example of configuration:
```xml
<clickhouse>
@ -229,12 +347,22 @@ SELECT dictGet('dict', 'b', 2);
└─────────────────────────┘
```
## Named collections for accessing remote ClickHouse database
## Named collections for accessing a remote ClickHouse database
The description of parameters see [remote](../sql-reference/table-functions/remote.md/#parameters).
Example of configuration:
```sql
CREATE NAMED COLLECTION remote1 AS
host = 'remote_host',
port = 9000,
database = 'system',
user = 'foo',
password = 'secret',
secure = 1
```
```xml
<clickhouse>
<named_collections>
@ -286,3 +414,4 @@ SELECT dictGet('dict', 'b', 1);
│ a │
└─────────────────────────┘
```

File diff suppressed because it is too large Load Diff

View File

@ -577,7 +577,7 @@ Default value: 20
**Usage**
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_pool_size) * [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
## max_part_loading_threads {#max-part-loading-threads}
@ -840,4 +840,4 @@ Possible values:
- `Always` or `Never`.
Default value: `Never`
Default value: `Never`

View File

@ -882,6 +882,38 @@ My NULL
My NULL
```
### input_format_csv_trim_whitespaces {#input_format_csv_trim_whitespaces}
Trims spaces and tabs in non-quoted CSV strings.
Default value: `true`.
**Examples**
Query
```bash
echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_trim_whitespaces=true
```
Result
```text
"string"
```
Query
```bash
echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_trim_whitespaces=false
```
Result
```text
" string "
```
## Values format settings {#values-format-settings}
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
@ -1182,7 +1214,7 @@ Possible values:
- `bin` - as 16-bytes binary.
- `str` - as a string of 36 bytes.
- `ext` - as extention with ExtType = 2.
- `ext` - as extension with ExtType = 2.
Default value: `ext`.

View File

@ -452,6 +452,8 @@ Possible values:
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which dont belong to the current bucket are flushed and reassigned.
Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
- hash
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
@ -644,6 +646,48 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
However, the block size cannot be more than `max_block_size` rows.
By default: 1,000,000. It only works when reading from MergeTree engines.
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
The maximum number of simultaneously processed queries related to MergeTree table per user.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
```
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
Modifying the setting for one query or user does not affect other queries.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
```
**See Also**
- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries)
## merge_tree_min_rows_for_concurrent_read {#setting-merge-tree-min-rows-for-concurrent-read}
If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
@ -1048,6 +1092,12 @@ Timeouts in seconds on the socket used for communicating with the client.
Default value: 10, 300, 300.
## handshake_timeout_ms {#handshake-timeout-ms}
Timeout in milliseconds for receiving Hello packet from replicas during handshake.
Default value: 10000.
## cancel_http_readonly_queries_on_client_close {#cancel-http-readonly-queries-on-client-close}
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
@ -1105,7 +1155,7 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes.
:::note
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
:::
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
@ -1126,7 +1176,7 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes.
:::note
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
:::
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
@ -1185,6 +1235,36 @@ Disable limit on kafka_num_consumers that depends on the number of available CPU
Default value: false.
## postgresql_connection_pool_size {#postgresql-connection-pool-size}
Connection pool size for PostgreSQL table engine and database engine.
Default value: 16
## postgresql_connection_pool_size {#postgresql-connection-pool-size}
Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.
Default value: 5000
## postgresql_connection_pool_auto_close_connection {#postgresql-connection-pool-auto-close-connection}
Close connection before returning connection to the pool.
Default value: true.
## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size}
Connection pool size for each connection settings string in ODBC bridge.
Default value: 16
## odbc_bridge_use_connection_pooling {#odbc-bridge-use-connection-pooling}
Use connection pooling in ODBC bridge. If set to false, a new connection is created every time.
Default value: true
## use_uncompressed_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
@ -1377,6 +1457,12 @@ Possible values:
Default value: `default`.
## allow_experimental_parallel_reading_from_replicas
If true, ClickHouse will send a SELECT query to all replicas of a table (up to `max_parallel_replicas`) . It will work for any kind of MergeTree table.
Default value: `false`.
## compile_expressions {#compile-expressions}
Enables or disables compilation of frequently used simple functions and operators to native code with LLVM at runtime.
@ -1708,7 +1794,7 @@ Default value: `100000`.
### async_insert_max_query_number {#async-insert-max-query-number}
The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#settings-async-insert-deduplicate) is enabled.
The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled.
Possible values:
@ -1739,7 +1825,7 @@ Possible values:
Default value: `0`.
### async_insert_deduplicate {#settings-async-insert-deduplicate}
### async_insert_deduplicate {#async-insert-deduplicate}
Enables or disables insert deduplication of `ASYNC INSERT` (for Replicated\* tables).
@ -1992,7 +2078,7 @@ FORMAT PrettyCompactMonoBlock
## distributed_push_down_limit {#distributed-push-down-limit}
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
Enables or disables [LIMIT](#limit) applying on each shard separately.
This will allow to avoid:
- Sending extra rows over network;
@ -2393,7 +2479,7 @@ Default value: 0.
## allow_introspection_functions {#settings-allow_introspection_functions}
Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
Possible values:
@ -3213,17 +3299,6 @@ Possible values:
Default value: `0`.
## allow_experimental_geo_types {#allow-experimental-geo-types}
Allows working with experimental [geo data types](../../sql-reference/data-types/geo.md).
Possible values:
- 0 — Working with geo data types is disabled.
- 1 — Working with geo data types is enabled.
Default value: `0`.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
@ -3465,7 +3540,7 @@ Default value: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
Possible values:
@ -3566,7 +3641,7 @@ SETTINGS index_granularity = 8192 │
## external_table_functions_use_nulls {#external-table-functions-use-nulls}
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md)] table functions use Nullable columns.
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md) table functions use Nullable columns.
Possible values:
@ -3791,8 +3866,8 @@ Result:
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
Enables or disables returning results of type:
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth).
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot).
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toLastDayOfWeek](../../sql-reference/functions/date-time-functions.md#tolastdayofweek) and [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday).
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
Possible values:
@ -4154,6 +4229,12 @@ Default value: `2000`
If it's enabled, in hedged requests we can start new connection until receiving first data packet even if we have already made some progress
(but progress haven't updated for `receive_data_timeout` timeout), otherwise we disable changing replica after the first time we made progress.
## parallel_view_processing
Enables pushing to attached views concurrently instead of sequentially.
Default value: `false`.
## partial_result_on_first_cancel {#partial_result_on_first_cancel}
When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterwards, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests.
@ -4223,3 +4304,12 @@ Possible values:
- false — Disallow.
Default value: `false`.
## zstd_window_log_max
Allows you to select the max window log of ZSTD (it will not be used for MergeTree family)
Type: Int64
Default: 0

View File

@ -0,0 +1,27 @@
---
slug: /en/operations/system-tables/build_options
---
# build_options
Contains information about the ClickHouse server's build options.
Columns:
- `name` (String) — Name of the build option, e.g. `USE_ODBC`
- `value` (String) — Value of the build option, e.g. `1`
**Example**
``` sql
SELECT * FROM system.build_options LIMIT 5
```
``` text
┌─name─────────────┬─value─┐
│ USE_BROTLI │ 1 │
│ USE_BZIP2 │ 1 │
│ USE_CAPNP │ 1 │
│ USE_CASSANDRA │ 1 │
│ USE_DATASKETCHES │ 1 │
└──────────────────┴───────┘
```

View File

@ -28,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.

Some files were not shown because too many files have changed in this diff Show More