mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 09:32:01 +00:00
Merge remote-tracking branch 'upstream/master' into HEAD
This commit is contained in:
commit
c980771c22
5
.github/workflows/pull_request.yml
vendored
5
.github/workflows/pull_request.yml
vendored
@ -46,7 +46,12 @@ jobs:
|
||||
- name: Python unit tests
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
echo "Testing the main ci directory"
|
||||
python3 -m unittest discover -s . -p '*_test.py'
|
||||
for dir in *_lambda/; do
|
||||
echo "Testing $dir"
|
||||
python3 -m unittest discover -s "$dir" -p '*_test.py'
|
||||
done
|
||||
DockerHubPushAarch64:
|
||||
needs: CheckLabels
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
|
15
.gitmodules
vendored
15
.gitmodules
vendored
@ -35,10 +35,9 @@
|
||||
[submodule "contrib/unixodbc"]
|
||||
path = contrib/unixodbc
|
||||
url = https://github.com/ClickHouse/UnixODBC
|
||||
[submodule "contrib/protobuf"]
|
||||
path = contrib/protobuf
|
||||
url = https://github.com/ClickHouse/protobuf
|
||||
branch = v3.13.0.1
|
||||
[submodule "contrib/google-protobuf"]
|
||||
path = contrib/google-protobuf
|
||||
url = https://github.com/ClickHouse/google-protobuf.git
|
||||
[submodule "contrib/boost"]
|
||||
path = contrib/boost
|
||||
url = https://github.com/ClickHouse/boost
|
||||
@ -267,10 +266,7 @@
|
||||
url = https://github.com/ClickHouse/nats.c
|
||||
[submodule "contrib/vectorscan"]
|
||||
path = contrib/vectorscan
|
||||
url = https://github.com/ClickHouse/vectorscan.git
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/ClickHouse/c-ares
|
||||
url = https://github.com/VectorCamp/vectorscan.git
|
||||
[submodule "contrib/llvm-project"]
|
||||
path = contrib/llvm-project
|
||||
url = https://github.com/ClickHouse/llvm-project
|
||||
@ -344,3 +340,6 @@
|
||||
[submodule "contrib/isa-l"]
|
||||
path = contrib/isa-l
|
||||
url = https://github.com/ClickHouse/isa-l.git
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/c-ares/c-ares.git
|
||||
|
@ -102,6 +102,17 @@ if (ENABLE_FUZZING)
|
||||
set (ENABLE_PROTOBUF 1)
|
||||
endif()
|
||||
|
||||
option (ENABLE_WOBOQ_CODEBROWSER "Build for woboq codebrowser" OFF)
|
||||
|
||||
if (ENABLE_WOBOQ_CODEBROWSER)
|
||||
set (ENABLE_EMBEDDED_COMPILER 0)
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-poison-system-directories")
|
||||
# woboq codebrowser uses clang tooling, and they could add default system
|
||||
# clang includes, and later clang will warn for those added by itself
|
||||
# includes.
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-poison-system-directories")
|
||||
endif()
|
||||
|
||||
# Global libraries
|
||||
# See:
|
||||
# - default_libs.cmake
|
||||
@ -259,8 +270,8 @@ endif ()
|
||||
option (ENABLE_BUILD_PATH_MAPPING "Enable remapping of file source paths in debug info, predefined preprocessor macros, and __builtin_FILE(). It's used to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ${ENABLE_BUILD_PATH_MAPPING_DEFAULT})
|
||||
|
||||
if (ENABLE_BUILD_PATH_MAPPING)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.")
|
||||
endif ()
|
||||
|
||||
option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
|
||||
@ -557,7 +568,7 @@ if (NATIVE_BUILD_TARGETS
|
||||
)
|
||||
message (STATUS "Building native targets...")
|
||||
|
||||
set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native")
|
||||
set (NATIVE_BUILD_DIR "${PROJECT_BINARY_DIR}/native")
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
|
||||
@ -571,7 +582,7 @@ if (NATIVE_BUILD_TARGETS
|
||||
# Avoid overriding .cargo/config.toml with native toolchain.
|
||||
"-DENABLE_RUST=OFF"
|
||||
"-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}"
|
||||
${CMAKE_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}
|
||||
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
|
||||
COMMAND_ECHO STDOUT)
|
||||
|
||||
|
@ -22,12 +22,10 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16
|
||||
* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25
|
||||
* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25
|
||||
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - Jun 8 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup in Bangalore**](https://www.meetup.com/clickhouse-bangalore-user-group/events/293740066/) - Jun 7
|
||||
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7
|
||||
* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13
|
||||
|
||||
|
||||
Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <cassert>
|
||||
#include <stdexcept> // for std::logic_error
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <iosfwd>
|
||||
@ -326,5 +327,16 @@ namespace ZeroTraits
|
||||
inline void set(StringRef & x) { x.size = 0; }
|
||||
}
|
||||
|
||||
namespace PackedZeroTraits
|
||||
{
|
||||
template <typename Second, template <typename, typename> class PackedPairNoInit>
|
||||
inline bool check(const PackedPairNoInit<StringRef, Second> p)
|
||||
{ return 0 == p.key.size; }
|
||||
|
||||
template <typename Second, template <typename, typename> class PackedPairNoInit>
|
||||
inline void set(PackedPairNoInit<StringRef, Second> & p)
|
||||
{ p.key.size = 0; }
|
||||
}
|
||||
|
||||
|
||||
std::ostream & operator<<(std::ostream & os, const StringRef & str);
|
||||
|
@ -28,14 +28,28 @@ uint64_t getMemoryAmountOrZero()
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
// Try to lookup at the Cgroup limit
|
||||
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
|
||||
if (cgroup_limit.is_open())
|
||||
|
||||
// CGroups v2
|
||||
std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
|
||||
if (cgroupv2_limit.is_open())
|
||||
{
|
||||
uint64_t memory_limit = 0; // in case of read error
|
||||
cgroup_limit >> memory_limit;
|
||||
uint64_t memory_limit = 0;
|
||||
cgroupv2_limit >> memory_limit;
|
||||
if (memory_limit > 0 && memory_limit < memory_amount)
|
||||
memory_amount = memory_limit;
|
||||
}
|
||||
else
|
||||
{
|
||||
// CGroups v1
|
||||
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
|
||||
if (cgroup_limit.is_open())
|
||||
{
|
||||
uint64_t memory_limit = 0; // in case of read error
|
||||
cgroup_limit >> memory_limit;
|
||||
if (memory_limit > 0 && memory_limit < memory_amount)
|
||||
memory_amount = memory_limit;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return memory_amount;
|
||||
|
@ -274,7 +274,9 @@ void SocketImpl::shutdown()
|
||||
|
||||
int SocketImpl::sendBytes(const void* buffer, int length, int flags)
|
||||
{
|
||||
if (_isBrokenTimeout)
|
||||
bool blocking = _blocking && (flags & MSG_DONTWAIT) == 0;
|
||||
|
||||
if (_isBrokenTimeout && blocking)
|
||||
{
|
||||
if (_sndTimeout.totalMicroseconds() != 0)
|
||||
{
|
||||
@ -289,11 +291,13 @@ int SocketImpl::sendBytes(const void* buffer, int length, int flags)
|
||||
if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
|
||||
rc = ::send(_sockfd, reinterpret_cast<const char*>(buffer), length, flags);
|
||||
}
|
||||
while (_blocking && rc < 0 && lastError() == POCO_EINTR);
|
||||
while (blocking && rc < 0 && lastError() == POCO_EINTR);
|
||||
if (rc < 0)
|
||||
{
|
||||
int err = lastError();
|
||||
if (err == POCO_EAGAIN || err == POCO_ETIMEDOUT)
|
||||
if ((err == POCO_EAGAIN || err == POCO_EWOULDBLOCK) && !blocking)
|
||||
;
|
||||
else if (err == POCO_EAGAIN || err == POCO_ETIMEDOUT)
|
||||
throw TimeoutException();
|
||||
else
|
||||
error(err);
|
||||
|
@ -183,6 +183,16 @@ namespace Net
|
||||
/// Returns true iff a reused session was negotiated during
|
||||
/// the handshake.
|
||||
|
||||
virtual void setBlocking(bool flag);
|
||||
/// Sets the socket in blocking mode if flag is true,
|
||||
/// disables blocking mode if flag is false.
|
||||
|
||||
virtual bool getBlocking() const;
|
||||
/// Returns the blocking mode of the socket.
|
||||
/// This method will only work if the blocking modes of
|
||||
/// the socket are changed via the setBlocking method!
|
||||
|
||||
|
||||
protected:
|
||||
void acceptSSL();
|
||||
/// Assume per-object mutex is locked.
|
||||
|
@ -201,6 +201,16 @@ namespace Net
|
||||
/// Returns true iff a reused session was negotiated during
|
||||
/// the handshake.
|
||||
|
||||
virtual void setBlocking(bool flag);
|
||||
/// Sets the socket in blocking mode if flag is true,
|
||||
/// disables blocking mode if flag is false.
|
||||
|
||||
virtual bool getBlocking() const;
|
||||
/// Returns the blocking mode of the socket.
|
||||
/// This method will only work if the blocking modes of
|
||||
/// the socket are changed via the setBlocking method!
|
||||
|
||||
|
||||
protected:
|
||||
void acceptSSL();
|
||||
/// Performs a SSL server-side handshake.
|
||||
|
@ -629,5 +629,15 @@ bool SecureSocketImpl::sessionWasReused()
|
||||
return false;
|
||||
}
|
||||
|
||||
void SecureSocketImpl::setBlocking(bool flag)
|
||||
{
|
||||
_pSocket->setBlocking(flag);
|
||||
}
|
||||
|
||||
bool SecureSocketImpl::getBlocking() const
|
||||
{
|
||||
return _pSocket->getBlocking();
|
||||
}
|
||||
|
||||
|
||||
} } // namespace Poco::Net
|
||||
|
@ -237,5 +237,15 @@ int SecureStreamSocketImpl::completeHandshake()
|
||||
return _impl.completeHandshake();
|
||||
}
|
||||
|
||||
bool SecureStreamSocketImpl::getBlocking() const
|
||||
{
|
||||
return _impl.getBlocking();
|
||||
}
|
||||
|
||||
void SecureStreamSocketImpl::setBlocking(bool flag)
|
||||
{
|
||||
_impl.setBlocking(flag);
|
||||
}
|
||||
|
||||
|
||||
} } // namespace Poco::Net
|
||||
|
@ -5,11 +5,11 @@ if (NOT TARGET check)
|
||||
if (CMAKE_CONFIGURATION_TYPES)
|
||||
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
|
||||
--force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
else ()
|
||||
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
|
||||
--force-new-ctest-process --output-on-failure
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
@ -5,14 +5,14 @@ if (Git_FOUND)
|
||||
# Commit hash + whether the building workspace was dirty or not
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" rev-parse HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_HASH
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Branch name
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_BRANCH
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
@ -20,14 +20,14 @@ if (Git_FOUND)
|
||||
SET(ENV{TZ} "UTC")
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_DATE
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Subject of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
@ -35,7 +35,7 @@ if (Git_FOUND)
|
||||
|
||||
execute_process(
|
||||
COMMAND ${GIT_EXECUTABLE} status
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
else()
|
||||
message(STATUS "Git could not be found.")
|
||||
endif()
|
||||
|
@ -7,6 +7,6 @@ message (STATUS "compiler CXX = ${CMAKE_CXX_COMPILER} ${FULL_CXX_FLAGS}")
|
||||
message (STATUS "LINKER_FLAGS = ${FULL_EXE_LINKER_FLAGS}")
|
||||
|
||||
# Reproducible builds
|
||||
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}")
|
||||
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}")
|
||||
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}")
|
||||
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}")
|
||||
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}")
|
||||
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}")
|
||||
|
@ -8,6 +8,9 @@ option (SANITIZE "Enable one of the code sanitizers" "")
|
||||
|
||||
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
|
||||
|
||||
# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
|
||||
# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
|
||||
|
||||
if (SANITIZE)
|
||||
if (SANITIZE STREQUAL "address")
|
||||
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
|
||||
@ -29,14 +32,14 @@ if (SANITIZE)
|
||||
|
||||
# Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries.
|
||||
# Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay.
|
||||
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
|
||||
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
|
||||
|
||||
elseif (SANITIZE STREQUAL "thread")
|
||||
set (TSAN_FLAGS "-fsanitize=thread")
|
||||
if (COMPILER_CLANG)
|
||||
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
|
||||
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/tsan_suppressions.txt")
|
||||
endif()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
|
||||
@ -54,7 +57,7 @@ if (SANITIZE)
|
||||
set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
|
||||
endif()
|
||||
if (COMPILER_CLANG)
|
||||
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
endif()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
|
||||
|
@ -1,2 +1,2 @@
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
|
||||
tar xJf MacOSX11.0.sdk.tar.xz --strip-components=1
|
||||
|
@ -1,4 +1,4 @@
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/autogenerated_versions.txt)
|
||||
include(${PROJECT_SOURCE_DIR}/cmake/autogenerated_versions.txt)
|
||||
|
||||
set(VERSION_EXTRA "" CACHE STRING "")
|
||||
set(VERSION_TWEAK "" CACHE STRING "")
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -88,7 +88,7 @@ add_contrib (thrift-cmake thrift)
|
||||
# parquet/arrow/orc
|
||||
add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion
|
||||
add_contrib (avro-cmake avro) # requires: snappy
|
||||
add_contrib (protobuf-cmake protobuf)
|
||||
add_contrib (google-protobuf-cmake google-protobuf)
|
||||
add_contrib (openldap-cmake openldap)
|
||||
add_contrib (grpc-cmake grpc)
|
||||
add_contrib (msgpack-c-cmake msgpack-c)
|
||||
@ -156,7 +156,7 @@ add_contrib (libgsasl-cmake libgsasl) # requires krb5
|
||||
add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
|
||||
add_contrib (nats-io-cmake nats-io)
|
||||
add_contrib (isa-l-cmake isa-l)
|
||||
add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l
|
||||
add_contrib (libhdfs3-cmake libhdfs3) # requires: google-protobuf, krb5, isa-l
|
||||
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
|
||||
add_contrib (cppkafka-cmake cppkafka)
|
||||
add_contrib (libpqxx-cmake libpqxx)
|
||||
|
@ -6,7 +6,7 @@ if (NOT ENABLE_AVRO)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(AVROCPP_ROOT_DIR "${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++")
|
||||
set(AVROCPP_ROOT_DIR "${PROJECT_SOURCE_DIR}/contrib/avro/lang/c++")
|
||||
set(AVROCPP_INCLUDE_DIR "${AVROCPP_ROOT_DIR}/api")
|
||||
set(AVROCPP_SOURCE_DIR "${AVROCPP_ROOT_DIR}/impl")
|
||||
|
||||
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
||||
Subproject commit ecccfc026a42b30023289410a67024d561f4bf3e
|
||||
Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3
|
2
contrib/aws-c-auth
vendored
2
contrib/aws-c-auth
vendored
@ -1 +1 @@
|
||||
Subproject commit 30df6c407e2df43bd244e2c34c9b4a4b87372bfb
|
||||
Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12
|
2
contrib/aws-c-common
vendored
2
contrib/aws-c-common
vendored
@ -1 +1 @@
|
||||
Subproject commit 324fd1d973ccb25c813aa747bf1759cfde5121c5
|
||||
Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff
|
2
contrib/aws-c-event-stream
vendored
2
contrib/aws-c-event-stream
vendored
@ -1 +1 @@
|
||||
Subproject commit 39bfa94a14b7126bf0c1330286ef8db452d87e66
|
||||
Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d
|
2
contrib/aws-c-http
vendored
2
contrib/aws-c-http
vendored
@ -1 +1 @@
|
||||
Subproject commit 2c5a2a7d5556600b9782ffa6c9d7e09964df1abc
|
||||
Subproject commit dd34461987947672444d0bc872c5a733dfdb9711
|
2
contrib/aws-c-io
vendored
2
contrib/aws-c-io
vendored
@ -1 +1 @@
|
||||
Subproject commit 5d32c453560d0823df521a686bf7fbacde7f9be3
|
||||
Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89
|
2
contrib/aws-c-mqtt
vendored
2
contrib/aws-c-mqtt
vendored
@ -1 +1 @@
|
||||
Subproject commit 882c689561a3db1466330ccfe3b63637e0a575d3
|
||||
Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194
|
2
contrib/aws-c-s3
vendored
2
contrib/aws-c-s3
vendored
@ -1 +1 @@
|
||||
Subproject commit a41255ece72a7c887bba7f9d998ca3e14f4c8a1b
|
||||
Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900
|
2
contrib/aws-c-sdkutils
vendored
2
contrib/aws-c-sdkutils
vendored
@ -1 +1 @@
|
||||
Subproject commit 25bf5cf225f977c3accc6a05a0a7a181ef2a4a30
|
||||
Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972
|
2
contrib/aws-checksums
vendored
2
contrib/aws-checksums
vendored
@ -1 +1 @@
|
||||
Subproject commit 48e7c0e01479232f225c8044d76c84e74192889d
|
||||
Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0
|
@ -52,8 +52,8 @@ endif()
|
||||
|
||||
# Directories.
|
||||
SET(AWS_SDK_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws")
|
||||
SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-core")
|
||||
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-s3")
|
||||
SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-core")
|
||||
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3")
|
||||
|
||||
SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth")
|
||||
SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal")
|
||||
@ -118,7 +118,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in"
|
||||
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1")
|
||||
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10")
|
||||
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36")
|
||||
|
||||
|
||||
list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC})
|
||||
|
||||
list(APPEND AWS_PUBLIC_INCLUDES
|
||||
|
2
contrib/aws-crt-cpp
vendored
2
contrib/aws-crt-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit ec0bea288f451d884c0d80d534bc5c66241c39a4
|
||||
Subproject commit 8a301b7e842f1daed478090c869207300972379f
|
2
contrib/aws-s2n-tls
vendored
2
contrib/aws-s2n-tls
vendored
@ -1 +1 @@
|
||||
Subproject commit 0f1ba9e5c4a67cb3898de0c0b4f911d4194dc8de
|
||||
Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit 8fe7b3326ef482ee6ecdf5a4f698f2b8c2780f98
|
||||
Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17
|
2
contrib/c-ares
vendored
2
contrib/c-ares
vendored
@ -1 +1 @@
|
||||
Subproject commit afee6748b0b99acf4509d42fa37ac8422262f91b
|
||||
Subproject commit 6360e96b5cf8e5980c887ce58ef727e53d77243a
|
@ -48,6 +48,7 @@ SET(SRCS
|
||||
"${LIBRARY_DIR}/src/lib/ares_platform.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_process.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_query.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_rand.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_search.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_send.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_strcasecmp.c"
|
||||
|
@ -18,7 +18,7 @@ endif()
|
||||
# Need to use C++17 since the compilation is not possible with C++20 currently.
|
||||
set (CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(CASS_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/cassandra)
|
||||
set(CASS_ROOT_DIR ${PROJECT_SOURCE_DIR}/contrib/cassandra)
|
||||
set(CASS_SRC_DIR "${CASS_ROOT_DIR}/src")
|
||||
set(CASS_INCLUDE_DIR "${CASS_ROOT_DIR}/include")
|
||||
|
||||
|
@ -26,7 +26,7 @@ endif ()
|
||||
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
|
||||
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
|
||||
# as the library that's built using embedded tzdata is also specific to OS_LINUX
|
||||
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
|
||||
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
|
||||
# remove existing copies so that its generated fresh on each build.
|
||||
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
|
||||
|
||||
|
1
contrib/google-protobuf
vendored
Submodule
1
contrib/google-protobuf
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit c47efe2d8f6a60022b49ecd6cc23660687c8598f
|
@ -5,7 +5,7 @@ if(NOT ENABLE_PROTOBUF)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
|
||||
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
|
||||
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
|
||||
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
|
||||
# #include <sanitizer/asan_interface.h>
|
||||
@ -17,8 +17,8 @@ if(OS_FREEBSD AND SANITIZE STREQUAL "address")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/protobuf")
|
||||
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/protobuf")
|
||||
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf")
|
||||
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/google-protobuf")
|
||||
|
||||
|
||||
add_definitions(-DGOOGLE_PROTOBUF_CMAKE_BUILD)
|
||||
@ -35,7 +35,6 @@ set(libprotobuf_lite_files
|
||||
${protobuf_source_dir}/src/google/protobuf/arena.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/field_access_listener.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
|
||||
@ -86,6 +85,7 @@ set(libprotobuf_files
|
||||
${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc
|
||||
@ -316,7 +316,7 @@ else ()
|
||||
add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc")
|
||||
endif ()
|
||||
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake")
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
|
||||
|
||||
add_library(_protobuf INTERFACE)
|
||||
target_link_libraries(_protobuf INTERFACE _libprotobuf)
|
2
contrib/libgsasl
vendored
2
contrib/libgsasl
vendored
@ -1 +1 @@
|
||||
Subproject commit f4e7bf0bb068030d57266f87ccac4c8c012fb5c4
|
||||
Subproject commit 0fb79e7609ae5a5e015a41d24bcbadd48f8f5469
|
2
contrib/libpqxx
vendored
2
contrib/libpqxx
vendored
@ -1 +1 @@
|
||||
Subproject commit a4e834839270a8c1f7ff1db351ba85afced3f0e2
|
||||
Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d
|
@ -1,7 +1,7 @@
|
||||
# This file is a modified version of contrib/libuv/CMakeLists.txt
|
||||
|
||||
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv")
|
||||
set (BINARY_DIR "${CMAKE_BINARY_DIR}/contrib/libuv")
|
||||
set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/libuv")
|
||||
set (BINARY_DIR "${PROJECT_BINARY_DIR}/contrib/libuv")
|
||||
|
||||
set(uv_sources
|
||||
src/fs-poll.c
|
||||
|
2
contrib/libxml2
vendored
2
contrib/libxml2
vendored
@ -1 +1 @@
|
||||
Subproject commit f507d167f1755b7eaea09fb1a44d29aab828b6d1
|
||||
Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6
|
@ -15,7 +15,7 @@ endif()
|
||||
|
||||
# This is the LGPL libmariadb project.
|
||||
|
||||
set(CC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/mariadb-connector-c)
|
||||
set(CC_SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/mariadb-connector-c)
|
||||
set(CC_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
set(WITH_SSL ON)
|
||||
|
1
contrib/protobuf
vendored
1
contrib/protobuf
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2
|
@ -25,6 +25,9 @@ message(STATUS "Intel QPL version: ${QPL_VERSION}")
|
||||
# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib.
|
||||
# Output ch_contrib::qpl by linking with 8 library targets.
|
||||
|
||||
# The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link
|
||||
# only upstream isal (ch_contrib::isal) but at this point we can't.
|
||||
|
||||
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
|
||||
|
||||
# check nasm compiler
|
||||
@ -308,7 +311,7 @@ target_include_directories(middle_layer_lib
|
||||
target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB)
|
||||
|
||||
# [SUBDIR]c_api
|
||||
file(GLOB_RECURSE QPL_C_API_SRC
|
||||
file(GLOB_RECURSE QPL_C_API_SRC
|
||||
${QPL_SRC_DIR}/c_api/*.c
|
||||
${QPL_SRC_DIR}/c_api/*.cpp)
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy")
|
||||
set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/snappy")
|
||||
|
||||
if (ARCH_S390X)
|
||||
set (SNAPPY_IS_BIG_ENDIAN 1)
|
||||
|
@ -5,8 +5,8 @@ echo "Using sparse checkout for aws"
|
||||
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
|
||||
echo '/*' > $FILES_TO_CHECKOUT
|
||||
echo '!/*/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
|
||||
echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
|
||||
|
||||
git config core.sparsecheckout true
|
||||
git checkout $1
|
||||
|
2
contrib/vectorscan
vendored
2
contrib/vectorscan
vendored
@ -1 +1 @@
|
||||
Subproject commit 1f4d448314e581473103187765e4c949d01b4259
|
||||
Subproject commit 38431d111781843741a781a57a6381a527d900a4
|
@ -1,4 +1,4 @@
|
||||
set (SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/zlib-ng)
|
||||
set (SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/zlib-ng)
|
||||
|
||||
add_definitions(-DZLIB_COMPAT)
|
||||
add_definitions(-DWITH_GZFILEOP)
|
||||
|
@ -46,10 +46,12 @@ ENV CXX=clang++-${LLVM_VERSION}
|
||||
# Rust toolchain and libraries
|
||||
ENV RUSTUP_HOME=/rust/rustup
|
||||
ENV CARGO_HOME=/rust/cargo
|
||||
ENV PATH="/rust/cargo/env:${PATH}"
|
||||
ENV PATH="/rust/cargo/bin:${PATH}"
|
||||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
|
||||
chmod 777 -R /rust && \
|
||||
rustup toolchain install nightly && \
|
||||
rustup default nightly && \
|
||||
rustup component add rust-src && \
|
||||
rustup target add aarch64-unknown-linux-gnu && \
|
||||
rustup target add x86_64-apple-darwin && \
|
||||
rustup target add x86_64-unknown-freebsd && \
|
||||
|
@ -11,9 +11,11 @@ ccache_status () {
|
||||
|
||||
[ -O /build ] || git config --global --add safe.directory /build
|
||||
|
||||
mkdir -p /build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
|
||||
if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then
|
||||
mkdir -p /build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
|
||||
fi
|
||||
|
||||
# Uncomment to debug ccache. Don't put ccache log in /output right away, or it
|
||||
# will be confusingly packed into the "performance" package.
|
||||
|
@ -167,6 +167,7 @@ def parse_env_variables(
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
|
||||
)
|
||||
result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
|
||||
elif is_cross_darwin_arm:
|
||||
cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
|
||||
@ -181,6 +182,7 @@ def parse_env_variables(
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
|
||||
)
|
||||
result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
|
||||
elif is_cross_arm:
|
||||
cc = compiler[: -len(ARM_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
|
@ -15,7 +15,7 @@ nproc=$(($(nproc) + 2)) # increase parallelism
|
||||
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
|
||||
|
||||
mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY"
|
||||
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 "${CMAKE_FLAGS[@]}"
|
||||
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DENABLE_WOBOQ_CODEBROWSER=ON "${CMAKE_FLAGS[@]}"
|
||||
mkdir -p "$HTML_RESULT_DIRECTORY"
|
||||
echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log'
|
||||
/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \
|
||||
|
@ -626,7 +626,9 @@ if args.report == "main":
|
||||
message_array.append(str(faster_queries) + " faster")
|
||||
|
||||
if slower_queries:
|
||||
if slower_queries > 3:
|
||||
# This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/tests/ci/performance_comparison_check.py#L225
|
||||
# False positives rate should be < 1%: https://shorturl.at/CDEK8
|
||||
if slower_queries > 5:
|
||||
status = "failure"
|
||||
message_array.append(str(slower_queries) + " slower")
|
||||
|
||||
|
@ -132,6 +132,9 @@ function run_tests()
|
||||
|
||||
ADDITIONAL_OPTIONS+=('--report-logs-stats')
|
||||
|
||||
clickhouse-test "00001_select_1" > /dev/null ||:
|
||||
clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" ||:
|
||||
|
||||
set +e
|
||||
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
|
@ -3,5 +3,5 @@
|
||||
set -x
|
||||
|
||||
service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test '';
|
||||
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
||||
|
@ -65,6 +65,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
|
||||
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
|
||||
start
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
|
||||
@ -94,6 +97,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
|
||||
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
@ -1,6 +0,0 @@
|
||||
# ARM (AArch64) build works on Amazon Graviton, Oracle Cloud, Huawei Cloud ARM machines.
|
||||
# The support for AArch64 is pre-production ready.
|
||||
|
||||
wget 'https://builds.clickhouse.com/master/aarch64/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
sudo ./clickhouse install
|
@ -1,3 +0,0 @@
|
||||
fetch 'https://builds.clickhouse.com/master/freebsd/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
su -m root -c './clickhouse install'
|
@ -1,3 +0,0 @@
|
||||
wget 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
./clickhouse
|
@ -1,3 +0,0 @@
|
||||
wget 'https://builds.clickhouse.com/master/macos/clickhouse'
|
||||
chmod a+x ./clickhouse
|
||||
./clickhouse
|
@ -22,7 +22,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
|
||||
### Install Prerequisites {#install-prerequisites}
|
||||
|
||||
``` bash
|
||||
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
|
||||
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg
|
||||
```
|
||||
|
||||
### Install and Use the Clang compiler
|
||||
@ -43,9 +43,14 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
|
||||
|
||||
As of April 2023, any version of Clang >= 15 will work.
|
||||
GCC as a compiler is not supported
|
||||
GCC as a compiler is not supported.
|
||||
To build with a specific Clang version:
|
||||
|
||||
:::tip
|
||||
This is optional, if you are following along and just now installed Clang then check
|
||||
to see what version you have installed before setting this environment variable.
|
||||
:::
|
||||
|
||||
``` bash
|
||||
export CC=clang-16
|
||||
export CXX=clang++-16
|
||||
@ -109,18 +114,3 @@ mkdir build
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
|
||||
|
||||
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
|
||||
|
||||
The CI checks build the binaries on each commit to [ClickHouse](https://github.com/clickhouse/clickhouse/). To download them:
|
||||
|
||||
1. Open the [commits list](https://github.com/ClickHouse/ClickHouse/commits/master)
|
||||
1. Choose a **Merge pull request** commit that includes the new feature, or was added after the new feature
|
||||
1. Click the status symbol (yellow dot, red x, green check) to open the CI check list
|
||||
1. Scroll through the list until you find **ClickHouse build check x/x artifact groups are OK**
|
||||
1. Click **Details**
|
||||
1. Find the type of package for your operating system that you need and download the files.
|
||||
|
||||
![build artifact check](images/find-build-artifact.png)
|
||||
|
@ -119,7 +119,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
|
||||
|
||||
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
|
||||
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication.
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
||||
|
@ -55,7 +55,7 @@ ATTACH TABLE postgres_database.new_table;
|
||||
```
|
||||
|
||||
:::warning
|
||||
Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
|
||||
Before version 22.1, adding a table to replication left a non-removed temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
|
||||
:::
|
||||
|
||||
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
|
||||
@ -257,7 +257,7 @@ Please note that this should be used only if it is actually needed. If there is
|
||||
|
||||
1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
|
||||
|
||||
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
|
||||
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privilege.
|
||||
|
||||
3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.
|
||||
|
||||
|
@ -30,7 +30,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
||||
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
|
||||
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multi-tasked.
|
||||
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
@ -120,3 +120,93 @@ Values can be updated using the `ALTER TABLE` query. The primary key cannot be u
|
||||
```sql
|
||||
ALTER TABLE test UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
|
||||
```
|
||||
|
||||
### Joins
|
||||
|
||||
A special `direct` join with EmbeddedRocksDB tables is supported.
|
||||
This direct join avoids forming a hash table in memory and accesses
|
||||
the data directly from the EmbeddedRocksDB.
|
||||
|
||||
With large joins you may see much lower memory usage with direct joins
|
||||
because the hash table is not created.
|
||||
|
||||
To enable direct joins:
|
||||
```sql
|
||||
SET join_algorithm = 'direct, hash'
|
||||
```
|
||||
|
||||
:::tip
|
||||
When the `join_algorithm` is set to `direct, hash`, direct joins will be used
|
||||
when possible, and hash otherwise.
|
||||
:::
|
||||
|
||||
#### Example
|
||||
|
||||
##### Create and populate an EmbeddedRocksDB table:
|
||||
```sql
|
||||
CREATE TABLE rdb
|
||||
(
|
||||
`key` UInt32,
|
||||
`value` Array(UInt32),
|
||||
`value2` String
|
||||
)
|
||||
ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY key
|
||||
```
|
||||
|
||||
```sql
|
||||
INSERT INTO rdb
|
||||
SELECT
|
||||
toUInt32(sipHash64(number) % 10) as key,
|
||||
[key, key+1] as value,
|
||||
('val2' || toString(key)) as value2
|
||||
FROM numbers_mt(10);
|
||||
```
|
||||
|
||||
##### Create and populate a table to join with table `rdb`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE t2
|
||||
(
|
||||
`k` UInt16
|
||||
)
|
||||
ENGINE = TinyLog
|
||||
```
|
||||
|
||||
```sql
|
||||
INSERT INTO t2 SELECT number AS k
|
||||
FROM numbers_mt(10)
|
||||
```
|
||||
|
||||
##### Set the join algorithm to `direct`:
|
||||
|
||||
```sql
|
||||
SET join_algorithm = 'direct'
|
||||
```
|
||||
|
||||
##### An INNER JOIN:
|
||||
```sql
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT k AS key
|
||||
FROM t2
|
||||
) AS t2
|
||||
INNER JOIN rdb ON rdb.key = t2.key
|
||||
ORDER BY key ASC
|
||||
```
|
||||
```response
|
||||
┌─key─┬─rdb.key─┬─value──┬─value2─┐
|
||||
│ 0 │ 0 │ [0,1] │ val20 │
|
||||
│ 2 │ 2 │ [2,3] │ val22 │
|
||||
│ 3 │ 3 │ [3,4] │ val23 │
|
||||
│ 6 │ 6 │ [6,7] │ val26 │
|
||||
│ 7 │ 7 │ [7,8] │ val27 │
|
||||
│ 8 │ 8 │ [8,9] │ val28 │
|
||||
│ 9 │ 9 │ [9,10] │ val29 │
|
||||
└─────┴─────────┴────────┴────────┘
|
||||
```
|
||||
|
||||
### More information on Joins
|
||||
- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#settings-join_algorithm)
|
||||
- [JOIN clause](/docs/en/sql-reference/statements/select/join.md)
|
||||
|
@ -156,7 +156,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|
||||
| rpc\_client\_connect\_timeout | 600 * 1000 |
|
||||
| rpc\_client\_read\_timeout | 3600 * 1000 |
|
||||
| rpc\_client\_write\_timeout | 3600 * 1000 |
|
||||
| rpc\_client\_socekt\_linger\_timeout | -1 |
|
||||
| rpc\_client\_socket\_linger\_timeout | -1 |
|
||||
| rpc\_client\_connect\_retry | 10 |
|
||||
| rpc\_client\_timeout | 3600 * 1000 |
|
||||
| dfs\_default\_replica | 3 |
|
||||
@ -176,7 +176,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|
||||
| output\_write\_timeout | 3600 * 1000 |
|
||||
| output\_close\_timeout | 3600 * 1000 |
|
||||
| output\_packetpool\_size | 1024 |
|
||||
| output\_heeartbeat\_interval | 10 * 1000 |
|
||||
| output\_heartbeat\_interval | 10 * 1000 |
|
||||
| dfs\_client\_failover\_max\_attempts | 15 |
|
||||
| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 |
|
||||
| dfs\_client\_socketcache\_expiryMsec | 3000 |
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Hive
|
||||
|
||||
# Hive
|
||||
|
||||
The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
|
||||
The Hive engine allows you to perform `SELECT` queries on HDFS Hive table. Currently it supports input formats as below:
|
||||
|
||||
- Text: only supports simple scalar column types except `binary`
|
||||
|
||||
|
@ -10,7 +10,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
|
||||
|
||||
`NATS` lets you:
|
||||
|
||||
- Publish or subcribe to message subjects.
|
||||
- Publish or subscribe to message subjects.
|
||||
- Process new messages as they become available.
|
||||
|
||||
## Creating a Table {#table_engine-redisstreams-creating-a-table}
|
||||
@ -46,7 +46,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
Required parameters:
|
||||
|
||||
- `nats_url` – host:port (for example, `localhost:5672`)..
|
||||
- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
|
||||
- `nats_subjects` – List of subject for NATS table to subscribe/publish to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
|
||||
- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
|
||||
|
||||
Optional parameters:
|
||||
|
@ -13,8 +13,8 @@ The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data th
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
...
|
||||
) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
|
||||
```
|
||||
@ -57,7 +57,7 @@ or via config (since version 21.11):
|
||||
</named_collections>
|
||||
```
|
||||
|
||||
Some parameters can be overriden by key value arguments:
|
||||
Some parameters can be overridden by key value arguments:
|
||||
``` sql
|
||||
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
|
||||
```
|
||||
|
@ -42,7 +42,6 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
[rabbitmq_queue_consume = false,]
|
||||
[rabbitmq_address = '',]
|
||||
[rabbitmq_vhost = '/',]
|
||||
[rabbitmq_queue_consume = false,]
|
||||
[rabbitmq_username = '',]
|
||||
[rabbitmq_password = '',]
|
||||
[rabbitmq_commit_on_select = false,]
|
||||
|
@ -23,7 +23,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
|
||||
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
|
||||
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
|
||||
|
||||
### PARTITION BY
|
||||
|
||||
@ -131,14 +131,17 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
|
||||
|
||||
The following settings can be set before query execution or placed into configuration file.
|
||||
|
||||
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
|
||||
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
|
||||
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `32Mb`.
|
||||
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `16Mb`.
|
||||
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
|
||||
- `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`.
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
|
||||
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
|
||||
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
|
||||
|
||||
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
|
||||
|
||||
|
@ -109,7 +109,7 @@ INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
|
||||
VALUES (1667446031, 1, 6, 3)
|
||||
```
|
||||
|
||||
The data are inserted in both the table and the materialized view `test.mv_visits`.
|
||||
The data is inserted in both the table and the materialized view `test.mv_visits`.
|
||||
|
||||
To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`:
|
||||
|
||||
|
@ -78,7 +78,7 @@ ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
|
||||
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyper parameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
|
||||
|
||||
As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
|
||||
|
||||
@ -135,7 +135,7 @@ ORDER BY id;
|
||||
|
||||
Annoy supports `L2Distance` and `cosineDistance`.
|
||||
|
||||
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
|
||||
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time trade-off between better accuracy and speed.
|
||||
|
||||
__Example__:
|
||||
``` sql
|
||||
|
@ -165,7 +165,7 @@ Performance of such a query heavily depends on the table layout. Because of that
|
||||
|
||||
The key factors for a good performance:
|
||||
|
||||
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine
|
||||
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will under-utilize the machine
|
||||
- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing
|
||||
- partitions should be comparable in size, so all threads will do roughly the same amount of work
|
||||
|
||||
|
@ -15,6 +15,18 @@ tokenized cells of the string column. For example, the string cell "I will be a
|
||||
" wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
|
||||
useful the resulting inverted index will be.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/O_MnyUkrIq8"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
:::note
|
||||
Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
|
||||
ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.
|
||||
|
@ -779,7 +779,7 @@ Disks, volumes and storage policies should be declared inside the `<storage_conf
|
||||
|
||||
:::tip
|
||||
Disks can also be declared in the `SETTINGS` section of a query. This is useful
|
||||
for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
|
||||
for ad-hoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
|
||||
See [dynamic storage](#dynamic-storage) for more details.
|
||||
:::
|
||||
|
||||
@ -856,7 +856,7 @@ Tags:
|
||||
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
|
||||
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
|
||||
|
||||
Cofiguration examples:
|
||||
Configuration examples:
|
||||
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
@ -1219,11 +1219,12 @@ Authentication parameters (the disk will try all available methods **and** Manag
|
||||
* `account_name` and `account_key` - For authentication using Shared Key.
|
||||
|
||||
Limit parameters (mainly for internal usage):
|
||||
* `max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
|
||||
* `min_bytes_for_seek` - Limits the size of a seekable region.
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
|
@ -258,4 +258,4 @@ Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](.
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) description
|
||||
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting
|
||||
- [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions
|
||||
- [shardNum()](../../../sql-reference/functions/other-functions.md#shardnum) and [shardCount()](../../../sql-reference/functions/other-functions.md#shardcount) functions
|
||||
|
@ -65,7 +65,7 @@ if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
|
||||
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings every time you run a `SELECT` from `my_executable_table`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_executable_table (
|
||||
@ -223,4 +223,4 @@ SETTINGS
|
||||
pool_size = 4;
|
||||
```
|
||||
|
||||
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
|
||||
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
|
||||
|
@ -72,7 +72,7 @@ Additionally, number of keys will have a soft limit of 4 for the number of keys.
|
||||
|
||||
If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it.
|
||||
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
|
||||
Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect.
|
||||
Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.
|
||||
|
||||
## Supported operations {#table_engine-KeeperMap-supported-operations}
|
||||
|
||||
|
@ -87,7 +87,7 @@ ORDER BY (marketplace, review_date, product_category);
|
||||
|
||||
3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include.
|
||||
|
||||
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
|
||||
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of multiple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
|
||||
|
||||
```sql
|
||||
INSERT INTO amazon_reviews
|
||||
@ -473,4 +473,4 @@ It runs quite a bit faster - which means the cache is helping us out here:
|
||||
└────────────┴───────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘
|
||||
|
||||
50 rows in set. Elapsed: 33.954 sec. Processed 150.96 million rows, 68.95 GB (4.45 million rows/s., 2.03 GB/s.)
|
||||
```
|
||||
```
|
||||
|
@ -317,7 +317,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
|
||||
Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
|
||||
:::
|
||||
|
||||
![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
|
||||
![Add ClickHouse as a Superset data source](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
|
||||
|
||||
### Add the table **cell_towers** as a Superset **dataset**
|
||||
|
||||
@ -364,5 +364,5 @@ The data is also available for interactive queries in the [Playground](https://p
|
||||
|
||||
This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
|
||||
|
||||
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
|
||||
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the host name and port number).
|
||||
:::
|
||||
|
@ -36,8 +36,8 @@ The data is in CSV files but uses a semi-colon for the delimiter. The rows look
|
||||
│ 7389 │ BMP180 │ 3735 │ 50.136 │ 11.062 │ 2019-06-01T00:00:06 │ 98905 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 12.1 │
|
||||
│ 13199 │ BMP180 │ 6664 │ 52.514 │ 13.44 │ 2019-06-01T00:00:07 │ 101855.54 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.74 │
|
||||
│ 12753 │ BMP180 │ 6440 │ 44.616 │ 2.032 │ 2019-06-01T00:00:07 │ 99475 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17 │
|
||||
│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │
|
||||
└───────────┴─────────────┴──────────┴────────┴───────┴─────────────────────┴──────────┴──────────┴───────────────────┴─────────────┘
|
||||
│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │
|
||||
└───────────┴─────────────┴──────────┴────────┴────────┴─────────────────────┴───────────┴──────────┴───────────────────┴─────────────┘
|
||||
```
|
||||
|
||||
2. We will use the following `MergeTree` table to store the data in ClickHouse:
|
||||
|
@ -806,7 +806,7 @@ FROM
|
||||
31 rows in set. Elapsed: 0.043 sec. Processed 7.54 million rows, 40.53 MB (176.71 million rows/s., 950.40 MB/s.)
|
||||
```
|
||||
|
||||
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unrealiable due to the filtering of the docs filter during data insertion.
|
||||
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unreliable due to the filtering of the docs filter during data insertion.
|
||||
|
||||
## Authors with the most diverse impact
|
||||
|
||||
@ -940,7 +940,7 @@ LIMIT 10
|
||||
10 rows in set. Elapsed: 0.106 sec. Processed 798.15 thousand rows, 13.97 MB (7.51 million rows/s., 131.41 MB/s.)
|
||||
```
|
||||
|
||||
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the basename of the file to identify his popular files - this allows for renames and should focus on code contributions.
|
||||
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the base name of the file to identify his popular files - this allows for renames and should focus on code contributions.
|
||||
|
||||
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBiYXNlLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIChhdXRob3IgPSAnQWxleGV5IE1pbG92aWRvdicpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYmFzZW5hbWUocGF0aCkgQVMgYmFzZQpPUkRFUiBCWSBjIERFU0MKTElNSVQgMTA=)
|
||||
|
||||
|
@ -75,7 +75,7 @@ SELECT
|
||||
payment_type,
|
||||
pickup_ntaname,
|
||||
dropoff_ntaname
|
||||
FROM s3(
|
||||
FROM gcs(
|
||||
'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz',
|
||||
'TabSeparatedWithNames'
|
||||
);
|
||||
|
@ -9,7 +9,7 @@ The data in this dataset is derived and cleaned from the full OpenSky dataset to
|
||||
|
||||
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
|
||||
|
||||
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
|
||||
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
|
||||
"Crowdsourced air traffic data from the OpenSky Network 2019–2020"
|
||||
Earth System Science Data 13(2), 2021
|
||||
https://doi.org/10.5194/essd-13-357-2021
|
||||
|
720
docs/en/getting-started/example-datasets/reddit-comments.md
Normal file
720
docs/en/getting-started/example-datasets/reddit-comments.md
Normal file
@ -0,0 +1,720 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/reddit-comments
|
||||
sidebar_label: Reddit comments
|
||||
---
|
||||
|
||||
# Reddit comments dataset
|
||||
|
||||
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 14B rows of data. The raw data is in JSON format in compressed files and the rows look like the following:
|
||||
|
||||
```json
|
||||
{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411}
|
||||
{"created_utc":1134365725,"author_flair_css_class":null,"score":1,"ups":1,"subreddit":"reddit.com","stickied":false,"link_id":"t3_17866","subreddit_id":"t5_6","controversiality":0,"body":"The site states \"What can I use it for? Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more...\", just like any other new breeed of sites that want us to store everything we have on the web. And they even guarantee multiple levels of security and encryption etc. But what prevents these web site operators fom accessing and/or stealing Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more, for competitive or personal gains...? I am pretty sure that most of them are honest, but what's there to prevent me from setting up a good useful site and stealing all your data? Call me paranoid - I am.","retrieved_on":1473738411,"distinguished":null,"gilded":0,"id":"c14","edited":false,"parent_id":"t3_17866","author":"zse7zse","author_flair_text":null}
|
||||
{"gilded":0,"distinguished":null,"retrieved_on":1473738411,"author":"[deleted]","author_flair_text":null,"edited":false,"id":"c15","parent_id":"t3_17869","subreddit":"reddit.com","score":0,"ups":0,"created_utc":1134366848,"author_flair_css_class":null,"body":"Jython related topics by Frank Wierzbicki","controversiality":0,"subreddit_id":"t5_6","stickied":false,"link_id":"t3_17869"}
|
||||
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"[deleted]","edited":false,"parent_id":"t3_17870","id":"c16","subreddit":"reddit.com","created_utc":1134367660,"author_flair_css_class":null,"score":1,"ups":1,"body":"[deleted]","controversiality":0,"stickied":false,"link_id":"t3_17870","subreddit_id":"t5_6"}
|
||||
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"rjoseph","edited":false,"id":"c17","parent_id":"t3_17817","subreddit":"reddit.com","author_flair_css_class":null,"created_utc":1134367754,"score":1,"ups":1,"body":"Saft is by far the best extension you could tak onto your Safari","controversiality":0,"link_id":"t3_17817","stickied":false,"subreddit_id":"t5_6"}
|
||||
```
|
||||
|
||||
A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket.
|
||||
|
||||
:::note
|
||||
The following commands were executed on a Production instance of ClickHouse Cloud with the minimum memory set to 720GB. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
|
||||
:::
|
||||
|
||||
1. Let's create a table for the Reddit data:
|
||||
|
||||
```sql
|
||||
CREATE TABLE reddit
|
||||
(
|
||||
subreddit LowCardinality(String),
|
||||
subreddit_id LowCardinality(String),
|
||||
subreddit_type Enum('public' = 1, 'restricted' = 2, 'user' = 3, 'archived' = 4, 'gold_restricted' = 5, 'private' = 6),
|
||||
author LowCardinality(String),
|
||||
body String CODEC(ZSTD(6)),
|
||||
created_date Date DEFAULT toDate(created_utc),
|
||||
created_utc DateTime,
|
||||
retrieved_on DateTime,
|
||||
id String,
|
||||
parent_id String,
|
||||
link_id String,
|
||||
score Int32,
|
||||
total_awards_received UInt16,
|
||||
controversiality UInt8,
|
||||
gilded UInt8,
|
||||
collapsed_because_crowd_control UInt8,
|
||||
collapsed_reason Enum('' = 0, 'comment score below threshold' = 1, 'may be sensitive content' = 2, 'potentially toxic' = 3, 'potentially toxic content' = 4),
|
||||
distinguished Enum('' = 0, 'moderator' = 1, 'admin' = 2, 'special' = 3),
|
||||
removal_reason Enum('' = 0, 'legal' = 1),
|
||||
author_created_utc DateTime,
|
||||
author_fullname LowCardinality(String),
|
||||
author_patreon_flair UInt8,
|
||||
author_premium UInt8,
|
||||
can_gild UInt8,
|
||||
can_mod_post UInt8,
|
||||
collapsed UInt8,
|
||||
is_submitter UInt8,
|
||||
_edited String,
|
||||
locked UInt8,
|
||||
quarantined UInt8,
|
||||
no_follow UInt8,
|
||||
send_replies UInt8,
|
||||
stickied UInt8,
|
||||
author_flair_text LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (subreddit, created_date, author);
|
||||
```
|
||||
|
||||
:::note
|
||||
The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `2005-12` to `2023-02`. The compression changes a couple of times though, so the file extensions are not consistent. For example:
|
||||
|
||||
- the file names are initially `RC_2005-12.bz2` to `RC_2017-11.bz2`
|
||||
- then they look like `RC_2017-12.xz` to `RC_2018-09.xz`
|
||||
- and finally `RC_2018-10.zst` to `RC_2023-02.zst`
|
||||
:::
|
||||
|
||||
2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
|
||||
'JSONEachRow'
|
||||
);
|
||||
|
||||
```
|
||||
|
||||
3. It will take a while depending on your resources, but when it's done verify it worked:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableQuantity(count())
|
||||
FROM reddit;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─formatReadableQuantity(count())─┐
|
||||
│ 85.97 million │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
4. Let's see how many unique subreddits were in December of 2017:
|
||||
|
||||
```sql
|
||||
SELECT uniqExact(subreddit)
|
||||
FROM reddit;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─uniqExact(subreddit)─┐
|
||||
│ 91613 │
|
||||
└──────────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 1.572 sec. Processed 85.97 million rows, 367.43 MB (54.71 million rows/s., 233.80 MB/s.)
|
||||
```
|
||||
|
||||
5. This query returns the top 10 subreddits (in terms of number of comments):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
subreddit,
|
||||
count() AS c
|
||||
FROM reddit
|
||||
GROUP BY subreddit
|
||||
ORDER BY c DESC
|
||||
LIMIT 20;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─subreddit───────┬───────c─┐
|
||||
│ AskReddit │ 5245881 │
|
||||
│ politics │ 1753120 │
|
||||
│ nfl │ 1220266 │
|
||||
│ nba │ 960388 │
|
||||
│ The_Donald │ 931857 │
|
||||
│ news │ 796617 │
|
||||
│ worldnews │ 765709 │
|
||||
│ CFB │ 710360 │
|
||||
│ gaming │ 602761 │
|
||||
│ movies │ 601966 │
|
||||
│ soccer │ 590628 │
|
||||
│ Bitcoin │ 583783 │
|
||||
│ pics │ 563408 │
|
||||
│ StarWars │ 562514 │
|
||||
│ funny │ 547563 │
|
||||
│ leagueoflegends │ 517213 │
|
||||
│ teenagers │ 492020 │
|
||||
│ DestinyTheGame │ 477377 │
|
||||
│ todayilearned │ 472650 │
|
||||
│ videos │ 450581 │
|
||||
└─────────────────┴─────────┘
|
||||
|
||||
20 rows in set. Elapsed: 0.368 sec. Processed 85.97 million rows, 367.43 MB (233.34 million rows/s., 997.25 MB/s.)
|
||||
```
|
||||
|
||||
6. Here are the top 10 authors in December of 2017, in terms of number of comments posted:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
author,
|
||||
count() AS c
|
||||
FROM reddit
|
||||
GROUP BY author
|
||||
ORDER BY c DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─author──────────┬───────c─┐
|
||||
│ [deleted] │ 5913324 │
|
||||
│ AutoModerator │ 784886 │
|
||||
│ ImagesOfNetwork │ 83241 │
|
||||
│ BitcoinAllBot │ 54484 │
|
||||
│ imguralbumbot │ 45822 │
|
||||
│ RPBot │ 29337 │
|
||||
│ WikiTextBot │ 25982 │
|
||||
│ Concise_AMA_Bot │ 19974 │
|
||||
│ MTGCardFetcher │ 19103 │
|
||||
│ TotesMessenger │ 19057 │
|
||||
└─────────────────┴─────────┘
|
||||
|
||||
10 rows in set. Elapsed: 8.143 sec. Processed 85.97 million rows, 711.05 MB (10.56 million rows/s., 87.32 MB/s.)
|
||||
```
|
||||
|
||||
7. We already inserted some data, but we will start over:
|
||||
|
||||
```sql
|
||||
TRUNCATE TABLE reddit;
|
||||
```
|
||||
|
||||
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. For practical reasons, it works well to insert the data by years starting with...
|
||||
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2005*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
...and ending with:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2023*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
If you do not have a cluster, use `s3` instead of `s3Cluster`:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2005*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
8. To verify it worked, here are the number of rows per year (as of February, 2023):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(created_utc) AS year,
|
||||
formatReadableQuantity(count())
|
||||
FROM reddit
|
||||
GROUP BY year;
|
||||
```
|
||||
|
||||
```response
|
||||
|
||||
┌─year─┬─formatReadableQuantity(count())─┐
|
||||
│ 2005 │ 1.07 thousand │
|
||||
│ 2006 │ 417.18 thousand │
|
||||
│ 2007 │ 2.46 million │
|
||||
│ 2008 │ 7.24 million │
|
||||
│ 2009 │ 18.86 million │
|
||||
│ 2010 │ 42.93 million │
|
||||
│ 2011 │ 28.91 million │
|
||||
│ 2012 │ 260.31 million │
|
||||
│ 2013 │ 402.21 million │
|
||||
│ 2014 │ 531.80 million │
|
||||
│ 2015 │ 667.76 million │
|
||||
│ 2016 │ 799.90 million │
|
||||
│ 2017 │ 972.86 million │
|
||||
│ 2018 │ 1.24 billion │
|
||||
│ 2019 │ 1.66 billion │
|
||||
│ 2020 │ 2.16 billion │
|
||||
│ 2021 │ 2.59 billion │
|
||||
│ 2022 │ 2.82 billion │
|
||||
│ 2023 │ 474.86 million │
|
||||
└──────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
9. Let's see how many rows were inserted and how much disk space the table is using:
|
||||
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(rows) AS count,
|
||||
formatReadableQuantity(count),
|
||||
formatReadableSize(sum(bytes)) AS disk_size,
|
||||
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
|
||||
FROM system.parts
|
||||
WHERE (table = 'reddit') AND active;
|
||||
```
|
||||
|
||||
Notice the compression of disk storage is about 1/3 of the uncompressed size:
|
||||
|
||||
```response
|
||||
┌───────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size─┬─uncompressed_size─┐
|
||||
│ 14688534662 │ 14.69 billion │ 1.03 TiB │ 3.26 TiB │
|
||||
└─────────────┴───────────────────────────────────┴───────────┴───────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.005 sec.
|
||||
```
|
||||
|
||||
9. The following query shows how many comments, authors and subreddits we have for each month:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toStartOfMonth(created_utc) AS firstOfMonth,
|
||||
count() AS c,
|
||||
bar(c, 0, 50000000, 25) AS bar_count,
|
||||
uniq(author) AS authors,
|
||||
bar(authors, 0, 5000000, 25) AS bar_authors,
|
||||
uniq(subreddit) AS subreddits,
|
||||
bar(subreddits, 0, 100000, 25) AS bar_subreddits
|
||||
FROM reddit
|
||||
GROUP BY firstOfMonth
|
||||
ORDER BY firstOfMonth ASC;
|
||||
```
|
||||
|
||||
This is a substantial query that has to process all 14.69 billion rows, but we still get an impressive response time (about 48 seconds):
|
||||
|
||||
```response
|
||||
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬──authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
|
||||
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
|
||||
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
|
||||
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
|
||||
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
|
||||
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
|
||||
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
|
||||
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
|
||||
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
|
||||
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
|
||||
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
|
||||
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
|
||||
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
|
||||
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
|
||||
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
|
||||
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
|
||||
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
|
||||
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
|
||||
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
|
||||
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
|
||||
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
|
||||
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
|
||||
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
|
||||
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
|
||||
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
|
||||
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
|
||||
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
|
||||
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
|
||||
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
|
||||
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
|
||||
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
|
||||
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
|
||||
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
|
||||
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
|
||||
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
|
||||
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
|
||||
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
|
||||
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
|
||||
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
|
||||
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
|
||||
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
|
||||
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
|
||||
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
|
||||
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
|
||||
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
|
||||
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
|
||||
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
|
||||
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
|
||||
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
|
||||
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
|
||||
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
|
||||
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
|
||||
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
|
||||
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
|
||||
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
|
||||
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
|
||||
│ 2010-07-01 │ 806612 │ ▍ │ 76486 │ ▍ │ 1955 │ ▍ │
|
||||
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
|
||||
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
|
||||
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
|
||||
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
|
||||
│ 2010-12-01 │ 3642690 │ █▊ │ 196847 │ ▉ │ 3914 │ ▉ │
|
||||
│ 2011-01-01 │ 3924540 │ █▉ │ 215057 │ █ │ 4240 │ █ │
|
||||
│ 2011-02-01 │ 3859131 │ █▉ │ 223485 │ █ │ 4371 │ █ │
|
||||
│ 2011-03-01 │ 2877996 │ █▍ │ 208607 │ █ │ 3870 │ ▉ │
|
||||
│ 2011-04-01 │ 3859131 │ █▉ │ 248931 │ █▏ │ 4881 │ █▏ │
|
||||
│ 2011-06-01 │ 3859131 │ █▉ │ 267197 │ █▎ │ 5255 │ █▎ │
|
||||
│ 2011-08-01 │ 2943405 │ █▍ │ 259428 │ █▎ │ 5806 │ █▍ │
|
||||
│ 2011-10-01 │ 3859131 │ █▉ │ 327342 │ █▋ │ 6958 │ █▋ │
|
||||
│ 2011-12-01 │ 3728313 │ █▊ │ 354817 │ █▊ │ 7713 │ █▉ │
|
||||
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
|
||||
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
|
||||
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
|
||||
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
|
||||
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
|
||||
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
|
||||
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
|
||||
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
|
||||
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
|
||||
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
|
||||
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
|
||||
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
|
||||
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
|
||||
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
|
||||
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
|
||||
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
|
||||
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
|
||||
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
|
||||
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
|
||||
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
|
||||
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
|
||||
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
|
||||
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
|
||||
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
|
||||
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
|
||||
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
|
||||
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
|
||||
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
|
||||
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
|
||||
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
|
||||
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
|
||||
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
|
||||
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
|
||||
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
|
||||
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
|
||||
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
|
||||
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
|
||||
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
|
||||
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
|
||||
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
|
||||
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
|
||||
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
|
||||
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
|
||||
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
|
||||
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
|
||||
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
|
||||
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
|
||||
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
|
||||
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
|
||||
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
|
||||
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
|
||||
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
|
||||
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
|
||||
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
|
||||
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
|
||||
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
|
||||
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
|
||||
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
|
||||
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
|
||||
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
|
||||
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
|
||||
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
|
||||
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
|
||||
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
|
||||
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
|
||||
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
|
||||
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
|
||||
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
|
||||
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
|
||||
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
|
||||
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
|
||||
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
|
||||
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
|
||||
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
|
||||
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
|
||||
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
|
||||
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
|
||||
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
|
||||
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
|
||||
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
|
||||
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
|
||||
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
|
||||
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
|
||||
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
|
||||
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
|
||||
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
|
||||
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
|
||||
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
|
||||
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
|
||||
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
|
||||
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
|
||||
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
|
||||
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
|
||||
│ 2019-10-01 │ 145909884 │ █████████████████████████ │ 7160126 │ █████████████████████████ │ 152075 │ █████████████████████████ │
|
||||
│ 2019-11-01 │ 138512489 │ █████████████████████████ │ 7098723 │ █████████████████████████ │ 164597 │ █████████████████████████ │
|
||||
│ 2019-12-01 │ 146012313 │ █████████████████████████ │ 7438261 │ █████████████████████████ │ 166966 │ █████████████████████████ │
|
||||
│ 2020-01-01 │ 153498208 │ █████████████████████████ │ 7703548 │ █████████████████████████ │ 174390 │ █████████████████████████ │
|
||||
│ 2020-02-01 │ 148386817 │ █████████████████████████ │ 7582031 │ █████████████████████████ │ 170257 │ █████████████████████████ │
|
||||
│ 2020-03-01 │ 166266315 │ █████████████████████████ │ 8339049 │ █████████████████████████ │ 192460 │ █████████████████████████ │
|
||||
│ 2020-04-01 │ 178511581 │ █████████████████████████ │ 8991649 │ █████████████████████████ │ 202334 │ █████████████████████████ │
|
||||
│ 2020-05-01 │ 189993779 │ █████████████████████████ │ 9331358 │ █████████████████████████ │ 217357 │ █████████████████████████ │
|
||||
│ 2020-06-01 │ 187914434 │ █████████████████████████ │ 9085003 │ █████████████████████████ │ 223362 │ █████████████████████████ │
|
||||
│ 2020-07-01 │ 194244994 │ █████████████████████████ │ 9321706 │ █████████████████████████ │ 228222 │ █████████████████████████ │
|
||||
│ 2020-08-01 │ 196099301 │ █████████████████████████ │ 9368408 │ █████████████████████████ │ 230251 │ █████████████████████████ │
|
||||
│ 2020-09-01 │ 182549761 │ █████████████████████████ │ 9271571 │ █████████████████████████ │ 227889 │ █████████████████████████ │
|
||||
│ 2020-10-01 │ 186583890 │ █████████████████████████ │ 9396112 │ █████████████████████████ │ 233715 │ █████████████████████████ │
|
||||
│ 2020-11-01 │ 186083723 │ █████████████████████████ │ 9623053 │ █████████████████████████ │ 234963 │ █████████████████████████ │
|
||||
│ 2020-12-01 │ 191317162 │ █████████████████████████ │ 9898168 │ █████████████████████████ │ 249115 │ █████████████████████████ │
|
||||
│ 2021-01-01 │ 210496207 │ █████████████████████████ │ 10503943 │ █████████████████████████ │ 259805 │ █████████████████████████ │
|
||||
│ 2021-02-01 │ 193510365 │ █████████████████████████ │ 10215033 │ █████████████████████████ │ 253656 │ █████████████████████████ │
|
||||
│ 2021-03-01 │ 207454415 │ █████████████████████████ │ 10365629 │ █████████████████████████ │ 267263 │ █████████████████████████ │
|
||||
│ 2021-04-01 │ 204573086 │ █████████████████████████ │ 10391984 │ █████████████████████████ │ 270543 │ █████████████████████████ │
|
||||
│ 2021-05-01 │ 217655366 │ █████████████████████████ │ 10648130 │ █████████████████████████ │ 288555 │ █████████████████████████ │
|
||||
│ 2021-06-01 │ 208027069 │ █████████████████████████ │ 10397311 │ █████████████████████████ │ 291520 │ █████████████████████████ │
|
||||
│ 2021-07-01 │ 210955954 │ █████████████████████████ │ 10063967 │ █████████████████████████ │ 252061 │ █████████████████████████ │
|
||||
│ 2021-08-01 │ 225681244 │ █████████████████████████ │ 10383556 │ █████████████████████████ │ 254569 │ █████████████████████████ │
|
||||
│ 2021-09-01 │ 220086513 │ █████████████████████████ │ 10298344 │ █████████████████████████ │ 256826 │ █████████████████████████ │
|
||||
│ 2021-10-01 │ 227527379 │ █████████████████████████ │ 10729882 │ █████████████████████████ │ 283328 │ █████████████████████████ │
|
||||
│ 2021-11-01 │ 228289963 │ █████████████████████████ │ 10995197 │ █████████████████████████ │ 302386 │ █████████████████████████ │
|
||||
│ 2021-12-01 │ 235807471 │ █████████████████████████ │ 11312798 │ █████████████████████████ │ 313876 │ █████████████████████████ │
|
||||
│ 2022-01-01 │ 256766679 │ █████████████████████████ │ 12074520 │ █████████████████████████ │ 340407 │ █████████████████████████ │
|
||||
│ 2022-02-01 │ 219927645 │ █████████████████████████ │ 10846045 │ █████████████████████████ │ 293236 │ █████████████████████████ │
|
||||
│ 2022-03-01 │ 236554668 │ █████████████████████████ │ 11330285 │ █████████████████████████ │ 302387 │ █████████████████████████ │
|
||||
│ 2022-04-01 │ 231188077 │ █████████████████████████ │ 11697995 │ █████████████████████████ │ 316303 │ █████████████████████████ │
|
||||
│ 2022-05-01 │ 230492108 │ █████████████████████████ │ 11448584 │ █████████████████████████ │ 323725 │ █████████████████████████ │
|
||||
│ 2022-06-01 │ 218842949 │ █████████████████████████ │ 11400399 │ █████████████████████████ │ 324846 │ █████████████████████████ │
|
||||
│ 2022-07-01 │ 242504279 │ █████████████████████████ │ 12049204 │ █████████████████████████ │ 335621 │ █████████████████████████ │
|
||||
│ 2022-08-01 │ 247215325 │ █████████████████████████ │ 12189276 │ █████████████████████████ │ 337873 │ █████████████████████████ │
|
||||
│ 2022-09-01 │ 234131223 │ █████████████████████████ │ 11674079 │ █████████████████████████ │ 326325 │ █████████████████████████ │
|
||||
│ 2022-10-01 │ 237365072 │ █████████████████████████ │ 11804508 │ █████████████████████████ │ 336063 │ █████████████████████████ │
|
||||
│ 2022-11-01 │ 229478878 │ █████████████████████████ │ 11543020 │ █████████████████████████ │ 323122 │ █████████████████████████ │
|
||||
│ 2022-12-01 │ 238862690 │ █████████████████████████ │ 11967451 │ █████████████████████████ │ 331668 │ █████████████████████████ │
|
||||
│ 2023-01-01 │ 253577512 │ █████████████████████████ │ 12264087 │ █████████████████████████ │ 332711 │ █████████████████████████ │
|
||||
│ 2023-02-01 │ 221285501 │ █████████████████████████ │ 11537091 │ █████████████████████████ │ 317879 │ █████████████████████████ │
|
||||
└──────────────┴───────────┴───────────────────────────┴──────────┴───────────────────────────┴────────────┴───────────────────────────┘
|
||||
|
||||
203 rows in set. Elapsed: 48.492 sec. Processed 14.69 billion rows, 213.35 GB (302.91 million rows/s., 4.40 GB/s.)
|
||||
```
|
||||
|
||||
10. Here are the top 10 subreddits of 2022:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
subreddit,
|
||||
count() AS count
|
||||
FROM reddit
|
||||
WHERE toYear(created_utc) = 2022
|
||||
GROUP BY subreddit
|
||||
ORDER BY count DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─subreddit──────┬────count─┐
|
||||
│ AskReddit │ 72312060 │
|
||||
│ AmItheAsshole │ 25323210 │
|
||||
│ teenagers │ 22355960 │
|
||||
│ worldnews │ 17797707 │
|
||||
│ FreeKarma4U │ 15652274 │
|
||||
│ FreeKarma4You │ 14929055 │
|
||||
│ wallstreetbets │ 14235271 │
|
||||
│ politics │ 12511136 │
|
||||
│ memes │ 11610792 │
|
||||
│ nba │ 11586571 │
|
||||
└────────────────┴──────────┘
|
||||
|
||||
10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.)
|
||||
```
|
||||
|
||||
11. Let's see which subreddits had the biggest increase in comments from 2018 to 2019:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
subreddit,
|
||||
newcount - oldcount AS diff
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
subreddit,
|
||||
count(*) AS newcount
|
||||
FROM reddit
|
||||
WHERE toYear(created_utc) = 2019
|
||||
GROUP BY subreddit
|
||||
)
|
||||
ALL INNER JOIN
|
||||
(
|
||||
SELECT
|
||||
subreddit,
|
||||
count(*) AS oldcount
|
||||
FROM reddit
|
||||
WHERE toYear(created_utc) = 2018
|
||||
GROUP BY subreddit
|
||||
) USING (subreddit)
|
||||
ORDER BY diff DESC
|
||||
LIMIT 50
|
||||
SETTINGS joined_subquery_requires_alias = 0;
|
||||
```
|
||||
|
||||
It looks like memes and teenagers were busy on Reddit in 2019:
|
||||
|
||||
```response
|
||||
┌─subreddit────────────┬─────diff─┐
|
||||
│ AskReddit │ 18765909 │
|
||||
│ memes │ 16496996 │
|
||||
│ teenagers │ 13071715 │
|
||||
│ AmItheAsshole │ 12312663 │
|
||||
│ dankmemes │ 12016716 │
|
||||
│ unpopularopinion │ 6809935 │
|
||||
│ PewdiepieSubmissions │ 6330844 │
|
||||
│ Market76 │ 5213690 │
|
||||
│ relationship_advice │ 4060717 │
|
||||
│ Minecraft │ 3328659 │
|
||||
│ freefolk │ 3227970 │
|
||||
│ classicwow │ 3063133 │
|
||||
│ Animemes │ 2866876 │
|
||||
│ gonewild │ 2457680 │
|
||||
│ PublicFreakout │ 2452288 │
|
||||
│ gameofthrones │ 2411661 │
|
||||
│ RoastMe │ 2378781 │
|
||||
│ ShitPostCrusaders │ 2345414 │
|
||||
│ AnthemTheGame │ 1813152 │
|
||||
│ nfl │ 1804407 │
|
||||
│ Showerthoughts │ 1797968 │
|
||||
│ Cringetopia │ 1764034 │
|
||||
│ pokemon │ 1763269 │
|
||||
│ entitledparents │ 1744852 │
|
||||
│ HistoryMemes │ 1721645 │
|
||||
│ MortalKombat │ 1718184 │
|
||||
│ trashy │ 1684357 │
|
||||
│ ChapoTrapHouse │ 1675363 │
|
||||
│ Brawlstars │ 1663763 │
|
||||
│ iamatotalpieceofshit │ 1647381 │
|
||||
│ ukpolitics │ 1599204 │
|
||||
│ cursedcomments │ 1590781 │
|
||||
│ Pikabu │ 1578597 │
|
||||
│ wallstreetbets │ 1535225 │
|
||||
│ AskOuija │ 1533214 │
|
||||
│ interestingasfuck │ 1528910 │
|
||||
│ aww │ 1439008 │
|
||||
│ wholesomememes │ 1436566 │
|
||||
│ SquaredCircle │ 1432172 │
|
||||
│ insanepeoplefacebook │ 1290686 │
|
||||
│ borderlands3 │ 1274462 │
|
||||
│ FreeKarma4U │ 1217769 │
|
||||
│ YangForPresidentHQ │ 1186918 │
|
||||
│ FortniteCompetitive │ 1184508 │
|
||||
│ AskMen │ 1180820 │
|
||||
│ EpicSeven │ 1172061 │
|
||||
│ MurderedByWords │ 1112476 │
|
||||
│ politics │ 1084087 │
|
||||
│ barstoolsports │ 1068020 │
|
||||
│ BattlefieldV │ 1053878 │
|
||||
└──────────────────────┴──────────┘
|
||||
|
||||
50 rows in set. Elapsed: 10.680 sec. Processed 29.38 billion rows, 198.67 GB (2.75 billion rows/s., 18.60 GB/s.)
|
||||
```
|
||||
|
||||
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all 14.69 billion comments three times for a substring, but the performance is actually quite impressive. (Unfortunately ClickHouse users are not very active on Reddit yet):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toStartOfQuarter(created_utc) AS quarter,
|
||||
sum(if(positionCaseInsensitive(body, 'clickhouse') > 0, 1, 0)) AS clickhouse,
|
||||
sum(if(positionCaseInsensitive(body, 'snowflake') > 0, 1, 0)) AS snowflake,
|
||||
sum(if(positionCaseInsensitive(body, 'postgres') > 0, 1, 0)) AS postgres
|
||||
FROM reddit
|
||||
GROUP BY quarter
|
||||
ORDER BY quarter ASC;
|
||||
```
|
||||
|
||||
```response
|
||||
┌────quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
|
||||
│ 2005-10-01 │ 0 │ 0 │ 0 │
|
||||
│ 2006-01-01 │ 0 │ 2 │ 23 │
|
||||
│ 2006-04-01 │ 0 │ 2 │ 24 │
|
||||
│ 2006-07-01 │ 0 │ 4 │ 13 │
|
||||
│ 2006-10-01 │ 0 │ 23 │ 73 │
|
||||
│ 2007-01-01 │ 0 │ 14 │ 91 │
|
||||
│ 2007-04-01 │ 0 │ 10 │ 59 │
|
||||
│ 2007-07-01 │ 0 │ 39 │ 116 │
|
||||
│ 2007-10-01 │ 0 │ 45 │ 125 │
|
||||
│ 2008-01-01 │ 0 │ 53 │ 234 │
|
||||
│ 2008-04-01 │ 0 │ 79 │ 303 │
|
||||
│ 2008-07-01 │ 0 │ 102 │ 174 │
|
||||
│ 2008-10-01 │ 0 │ 156 │ 323 │
|
||||
│ 2009-01-01 │ 0 │ 206 │ 208 │
|
||||
│ 2009-04-01 │ 0 │ 178 │ 417 │
|
||||
│ 2009-07-01 │ 0 │ 300 │ 295 │
|
||||
│ 2009-10-01 │ 0 │ 633 │ 589 │
|
||||
│ 2010-01-01 │ 0 │ 555 │ 501 │
|
||||
│ 2010-04-01 │ 0 │ 587 │ 469 │
|
||||
│ 2010-07-01 │ 0 │ 601 │ 696 │
|
||||
│ 2010-10-01 │ 0 │ 1246 │ 505 │
|
||||
│ 2011-01-01 │ 0 │ 758 │ 247 │
|
||||
│ 2011-04-01 │ 0 │ 537 │ 113 │
|
||||
│ 2011-07-01 │ 0 │ 173 │ 64 │
|
||||
│ 2011-10-01 │ 0 │ 649 │ 96 │
|
||||
│ 2012-01-01 │ 0 │ 4621 │ 662 │
|
||||
│ 2012-04-01 │ 0 │ 5737 │ 785 │
|
||||
│ 2012-07-01 │ 0 │ 6097 │ 1127 │
|
||||
│ 2012-10-01 │ 0 │ 7986 │ 600 │
|
||||
│ 2013-01-01 │ 0 │ 9704 │ 839 │
|
||||
│ 2013-04-01 │ 0 │ 8161 │ 853 │
|
||||
│ 2013-07-01 │ 0 │ 9704 │ 1028 │
|
||||
│ 2013-10-01 │ 0 │ 12879 │ 1404 │
|
||||
│ 2014-01-01 │ 0 │ 12317 │ 1548 │
|
||||
│ 2014-04-01 │ 0 │ 13181 │ 1577 │
|
||||
│ 2014-07-01 │ 0 │ 15640 │ 1710 │
|
||||
│ 2014-10-01 │ 0 │ 19479 │ 1959 │
|
||||
│ 2015-01-01 │ 0 │ 20411 │ 2104 │
|
||||
│ 2015-04-01 │ 1 │ 20309 │ 9112 │
|
||||
│ 2015-07-01 │ 0 │ 20325 │ 4771 │
|
||||
│ 2015-10-01 │ 0 │ 25087 │ 3030 │
|
||||
│ 2016-01-01 │ 0 │ 23462 │ 3126 │
|
||||
│ 2016-04-01 │ 3 │ 25496 │ 2757 │
|
||||
│ 2016-07-01 │ 4 │ 28233 │ 2928 │
|
||||
│ 2016-10-01 │ 2 │ 45445 │ 2449 │
|
||||
│ 2017-01-01 │ 9 │ 76019 │ 2808 │
|
||||
│ 2017-04-01 │ 9 │ 67919 │ 2803 │
|
||||
│ 2017-07-01 │ 13 │ 68974 │ 2771 │
|
||||
│ 2017-10-01 │ 12 │ 69730 │ 2906 │
|
||||
│ 2018-01-01 │ 17 │ 67476 │ 3152 │
|
||||
│ 2018-04-01 │ 3 │ 67139 │ 3986 │
|
||||
│ 2018-07-01 │ 14 │ 67979 │ 3609 │
|
||||
│ 2018-10-01 │ 28 │ 74147 │ 3850 │
|
||||
│ 2019-01-01 │ 14 │ 80250 │ 4305 │
|
||||
│ 2019-04-01 │ 30 │ 70307 │ 3872 │
|
||||
│ 2019-07-01 │ 33 │ 77149 │ 4164 │
|
||||
│ 2019-10-01 │ 22 │ 113011 │ 4369 │
|
||||
│ 2020-01-01 │ 34 │ 238273 │ 5133 │
|
||||
│ 2020-04-01 │ 52 │ 454467 │ 6100 │
|
||||
│ 2020-07-01 │ 37 │ 406623 │ 5507 │
|
||||
│ 2020-10-01 │ 49 │ 212143 │ 5385 │
|
||||
│ 2021-01-01 │ 56 │ 151262 │ 5749 │
|
||||
│ 2021-04-01 │ 71 │ 119928 │ 6039 │
|
||||
│ 2021-07-01 │ 53 │ 110342 │ 5765 │
|
||||
│ 2021-10-01 │ 92 │ 121144 │ 6401 │
|
||||
│ 2022-01-01 │ 93 │ 107512 │ 6772 │
|
||||
│ 2022-04-01 │ 120 │ 91560 │ 6687 │
|
||||
│ 2022-07-01 │ 183 │ 99764 │ 7377 │
|
||||
│ 2022-10-01 │ 123 │ 99447 │ 7052 │
|
||||
│ 2023-01-01 │ 126 │ 58733 │ 4891 │
|
||||
└────────────┴────────────┴───────────┴──────────┘
|
||||
|
||||
70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.)
|
@ -22,7 +22,7 @@ The steps below will easily work on a local install of ClickHouse too. The only
|
||||
|
||||
## Step-by-step instructions
|
||||
|
||||
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
|
||||
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result:
|
||||
|
||||
```sql
|
||||
DESCRIBE s3Cluster(
|
||||
@ -322,7 +322,7 @@ ORDER BY month ASC;
|
||||
A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty).
|
||||
|
||||
|
||||
### More subtitiles over time and when
|
||||
### More subtitles over time and when
|
||||
|
||||
With advances in speech recognition, it’s easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then?
|
||||
|
||||
@ -484,4 +484,4 @@ ARRAY JOIN
|
||||
│ 20th │ 16 │
|
||||
│ 10th │ 6 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
```
|
||||
|
@ -28,23 +28,25 @@ The quickest and easiest way to get up and running with ClickHouse is to create
|
||||
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
|
||||
:::
|
||||
|
||||
On Linux and macOS:
|
||||
On Linux, macOS and FreeBSD:
|
||||
|
||||
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server, clickhouse-client, clickhouse-local,
|
||||
ClickHouse Keeper, and other tools:
|
||||
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the
|
||||
following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server,
|
||||
clickhouse-client, clickhouse-local, ClickHouse Keeper, and other tools:
|
||||
|
||||
```bash
|
||||
curl https://clickhouse.com/ | sh
|
||||
```
|
||||
|
||||
1. Run the following command to start the ClickHouse server:
|
||||
|
||||
```bash
|
||||
./clickhouse server
|
||||
```
|
||||
|
||||
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
|
||||
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
|
||||
|
||||
1. Open a new terminal and use the **clickhouse-client** to connect to your service:
|
||||
1. Open a new terminal and use the **./clickhouse client** to connect to your service:
|
||||
|
||||
```bash
|
||||
./clickhouse client
|
||||
@ -330,7 +332,9 @@ For production environments, it’s recommended to use the latest `stable`-versi
|
||||
|
||||
To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside.
|
||||
|
||||
### From Sources {#from-sources}
|
||||
## Non-Production Deployments (Advanced)
|
||||
|
||||
### Compile From Source {#from-sources}
|
||||
|
||||
To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [macOS](/docs/en/development/build-osx.md).
|
||||
|
||||
@ -346,8 +350,33 @@ You’ll need to create data and metadata folders manually and `chown` them for
|
||||
|
||||
On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sources.
|
||||
|
||||
### From CI checks pre-built binaries
|
||||
ClickHouse binaries are built for each [commit](/docs/en/development/build.md#you-dont-have-to-build-clickhouse).
|
||||
### Install a CI-generated Binary
|
||||
|
||||
ClickHouse's continuous integration (CI) infrastructure produces specialized builds for each commit in the [ClickHouse
|
||||
repository](https://github.com/clickhouse/clickhouse/), e.g. [sanitized](https://github.com/google/sanitizers) builds, unoptimized (Debug)
|
||||
builds, cross-compiled builds etc. While such builds are normally only useful during development, they can in certain situations also be
|
||||
interesting for users.
|
||||
|
||||
:::note
|
||||
Since ClickHouse's CI is evolving over time, the exact steps to download CI-generated builds may vary.
|
||||
Also, CI may delete too old build artifacts, making them unavailable for download.
|
||||
:::
|
||||
|
||||
For example, to download a aarch64 binary for ClickHouse v23.4, follow these steps:
|
||||
|
||||
- Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238)
|
||||
- Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install.
|
||||
- Click the green check / yellow dot / red cross to open the list of CI checks.
|
||||
- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
|
||||
- Find the rows with compiler = "clang-*-aarch64" - there are multiple rows.
|
||||
- Download the artifacts for these builds.
|
||||
|
||||
To download binaries for very old x86-64 systems without [SSE3](https://en.wikipedia.org/wiki/SSE3) support or old ARM systems without
|
||||
[ARMv8.1-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.1-A) support, open a [pull
|
||||
request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI check "BuilderBinAmd64Compat", respectively
|
||||
"BuilderBinAarch64V80Compat". Then click "Details", open the "Build" fold, scroll to the end, find message "Notice: Build URLs
|
||||
https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the
|
||||
build.
|
||||
|
||||
## Launch {#launch}
|
||||
|
||||
|
@ -177,11 +177,11 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
|
||||
- `--user, -u` – The username. Default value: default.
|
||||
- `--password` – The password. Default value: empty string.
|
||||
- `--ask-password` - Prompt the user to enter a password.
|
||||
- `--query, -q` – The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option.
|
||||
- `--queries-file` – file path with queries to execute. You must specify either `query` or `queries-file` option.
|
||||
- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default).
|
||||
- `--query, -q` – The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`.
|
||||
- `--queries-file` – file path with queries to execute. Cannot be used simultaneously with `--query`.
|
||||
- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
|
||||
- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter).
|
||||
- `--multiquery, -n` – If specified, allow processing multiple queries separated by semicolons.
|
||||
- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default).
|
||||
- `--format, -f` – Use the specified default format to output the result.
|
||||
- `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables.
|
||||
- `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode.
|
||||
|
@ -467,6 +467,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`.
|
||||
- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`.
|
||||
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
|
||||
|
@ -2,34 +2,115 @@
|
||||
slug: /en/operations/named-collections
|
||||
sidebar_position: 69
|
||||
sidebar_label: "Named collections"
|
||||
title: "Named collections"
|
||||
---
|
||||
|
||||
# Storing details for connecting to external sources in configuration files
|
||||
Named collections provide a way to store collections of key-value pairs to be
|
||||
used to configure integrations with external sources. You can use named collections with
|
||||
dictionaries, tables, table functions, and object storage.
|
||||
|
||||
Details for connecting to external sources (dictionaries, tables, table functions) can be saved
|
||||
in configuration files and thus simplify the creation of objects and hide credentials
|
||||
from users with only SQL access.
|
||||
Named collections can be configured with DDL or in configuration files and are applied
|
||||
when ClickHouse starts. They simplify the creation of objects and the hiding of credentials
|
||||
from users without administrative access.
|
||||
|
||||
Parameters can be set in XML `<format>CSV</format>` and overridden in SQL `, format = 'TSV'`.
|
||||
The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`.
|
||||
The keys in a named collection must match the parameter names of the corresponding
|
||||
function, table engine, database, etc. In the examples below the parameter list is
|
||||
linked to for each type.
|
||||
|
||||
Named collections are stored in the `config.xml` file of the ClickHouse server in the `<named_collections>` section and are applied when ClickHouse starts.
|
||||
Parameters set in a named collection can be overridden in SQL, this is shown in the examples
|
||||
below.
|
||||
|
||||
Example of configuration:
|
||||
```xml
|
||||
$ cat /etc/clickhouse-server/config.d/named_collections.xml
|
||||
## Storing named collections in the system database
|
||||
|
||||
### DDL example
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION name AS
|
||||
key_1 = 'value',
|
||||
key_2 = 'value2',
|
||||
url = 'https://connection.url/'
|
||||
```
|
||||
|
||||
### Permissions to create named collections with DDL
|
||||
|
||||
To manage named collections with DDL a user must have the `named_control_collection` privilege. This can be assigned by adding a file to `/etc/clickhouse-server/users.d/`. The example gives the user `default` both the `access_management` and `named_collection_control` privileges:
|
||||
|
||||
```xml title='/etc/clickhouse-server/users.d/user_default.xml'
|
||||
<clickhouse>
|
||||
<users>
|
||||
<default>
|
||||
<password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex replace=true>
|
||||
<access_management>1</access_management>
|
||||
<!-- highlight-start -->
|
||||
<named_collection_control>1</named_collection_control>
|
||||
<!-- highlight-end -->
|
||||
</default>
|
||||
</users>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
:::tip
|
||||
In the above example the `passowrd_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user.
|
||||
:::
|
||||
|
||||
## Storing named collections in configuration files
|
||||
|
||||
### XML example
|
||||
|
||||
```xml title='/etc/clickhouse-server/config.d/named_collections.xml'
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
...
|
||||
<name>
|
||||
<key_1>value</key_1>
|
||||
<key_2>value_2</key_2>
|
||||
<url>https://connection.url/</url>
|
||||
</name>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Named collections for accessing S3.
|
||||
## Modifying named collections
|
||||
|
||||
Named collections that are created with DDL queries can be altered or dropped with DDL. Named collections created with XML files can be managed by editing or deleting the corresponding XML.
|
||||
|
||||
### Alter a DDL named collection
|
||||
|
||||
Change or add the keys `key1` and `key3` of the collection `collection2`:
|
||||
```sql
|
||||
ALTER NAMED COLLECTION collection2 SET key1=4, key3='value3'
|
||||
```
|
||||
|
||||
Remove the key `key2` from `collection2`:
|
||||
```sql
|
||||
ALTER NAMED COLLECTION collection2 DELETE key2
|
||||
```
|
||||
|
||||
Change or add the key `key1` and delete the key `key3` of the collection `collection2`:
|
||||
```sql
|
||||
ALTER NAMED COLLECTION collection2 SET key1=4, DELETE key3
|
||||
```
|
||||
|
||||
### Drop the DDL named collection `collection2`:
|
||||
```sql
|
||||
DROP NAMED COLLECTION collection2
|
||||
```
|
||||
|
||||
## Named collections for accessing S3
|
||||
|
||||
The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md).
|
||||
|
||||
Example of configuration:
|
||||
### DDL example
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION s3_mydata AS
|
||||
access_key_id = 'AKIAIOSFODNN7EXAMPLE',
|
||||
secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
|
||||
format = 'CSV',
|
||||
url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/'
|
||||
```
|
||||
|
||||
### XML example
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
@ -43,23 +124,23 @@ Example of configuration:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
### Example of using named collections with the s3 function
|
||||
### s3() function and S3 Table named collection examples
|
||||
|
||||
Both of the following examples use the same named collection `s3_mydata`:
|
||||
|
||||
#### s3() function
|
||||
|
||||
```sql
|
||||
INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz',
|
||||
format = 'TSV', structure = 'number UInt64', compression_method = 'gzip')
|
||||
SELECT * FROM numbers(10000);
|
||||
|
||||
SELECT count()
|
||||
FROM s3(s3_mydata, filename = 'test_file.tsv.gz')
|
||||
|
||||
┌─count()─┐
|
||||
│ 10000 │
|
||||
└─────────┘
|
||||
1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.)
|
||||
```
|
||||
|
||||
### Example of using named collections with an S3 table
|
||||
:::tip
|
||||
The first argument to the `s3()` function above is the name of the collection, `s3_mydata`. Without named collections, the access key ID, secret, format, and URL would all be passed in every call to the `s3()` function.
|
||||
:::
|
||||
|
||||
#### S3 table
|
||||
|
||||
```sql
|
||||
CREATE TABLE s3_engine_table (number Int64)
|
||||
@ -78,7 +159,22 @@ SELECT * FROM s3_engine_table LIMIT 3;
|
||||
|
||||
The description of parameters see [mysql](../sql-reference/table-functions/mysql.md).
|
||||
|
||||
Example of configuration:
|
||||
### DDL example
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION mymysql AS
|
||||
user = 'myuser',
|
||||
password = 'mypass',
|
||||
host = '127.0.0.1',
|
||||
port = 3306,
|
||||
database = 'test',
|
||||
connection_pool_size = 8,
|
||||
on_duplicate_clause = 1,
|
||||
replace_query = 1
|
||||
```
|
||||
|
||||
### XML example
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
@ -96,7 +192,11 @@ Example of configuration:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
### Example of using named collections with the mysql function
|
||||
### mysql() function, MySQL table, MySQL database, and Dictionary named collection examples
|
||||
|
||||
The four following examples use the same named collection `mymysql`:
|
||||
|
||||
#### mysql() function
|
||||
|
||||
```sql
|
||||
SELECT count() FROM mysql(mymysql, table = 'test');
|
||||
@ -105,8 +205,11 @@ SELECT count() FROM mysql(mymysql, table = 'test');
|
||||
│ 3 │
|
||||
└─────────┘
|
||||
```
|
||||
:::note
|
||||
The named collection does not specify the `table` parameter, so it is specified in the function call as `table = 'test'`.
|
||||
:::
|
||||
|
||||
### Example of using named collections with an MySQL table
|
||||
#### MySQL table
|
||||
|
||||
```sql
|
||||
CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0);
|
||||
@ -117,7 +220,11 @@ SELECT count() FROM mytable;
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
### Example of using named collections with database with engine MySQL
|
||||
:::note
|
||||
The DDL overrides the named collection setting for connection_pool_size.
|
||||
:::
|
||||
|
||||
#### MySQL database
|
||||
|
||||
```sql
|
||||
CREATE DATABASE mydatabase ENGINE = MySQL(mymysql);
|
||||
@ -130,7 +237,7 @@ SHOW TABLES FROM mydatabase;
|
||||
└────────┘
|
||||
```
|
||||
|
||||
### Example of using named collections with a dictionary with source MySQL
|
||||
#### MySQL Dictionary
|
||||
|
||||
```sql
|
||||
CREATE DICTIONARY dict (A Int64, B String)
|
||||
@ -150,6 +257,17 @@ SELECT dictGet('dict', 'B', 2);
|
||||
|
||||
The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md).
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION mypg AS
|
||||
user = 'pguser',
|
||||
password = 'jw8s0F4',
|
||||
host = '127.0.0.1',
|
||||
port = 5432,
|
||||
database = 'test',
|
||||
schema = 'test_schema',
|
||||
connection_pool_size = 8
|
||||
```
|
||||
|
||||
Example of configuration:
|
||||
```xml
|
||||
<clickhouse>
|
||||
@ -229,12 +347,22 @@ SELECT dictGet('dict', 'b', 2);
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## Named collections for accessing remote ClickHouse database
|
||||
## Named collections for accessing a remote ClickHouse database
|
||||
|
||||
The description of parameters see [remote](../sql-reference/table-functions/remote.md/#parameters).
|
||||
|
||||
Example of configuration:
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION remote1 AS
|
||||
host = 'remote_host',
|
||||
port = 9000,
|
||||
database = 'system',
|
||||
user = 'foo',
|
||||
password = 'secret',
|
||||
secure = 1
|
||||
```
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
@ -286,3 +414,4 @@ SELECT dictGet('dict', 'b', 1);
|
||||
│ a │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -577,7 +577,7 @@ Default value: 20
|
||||
|
||||
**Usage**
|
||||
|
||||
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_pool_size) * [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
|
||||
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
|
||||
|
||||
## max_part_loading_threads {#max-part-loading-threads}
|
||||
|
||||
@ -840,4 +840,4 @@ Possible values:
|
||||
|
||||
- `Always` or `Never`.
|
||||
|
||||
Default value: `Never`
|
||||
Default value: `Never`
|
||||
|
@ -882,6 +882,38 @@ My NULL
|
||||
My NULL
|
||||
```
|
||||
|
||||
### input_format_csv_trim_whitespaces {#input_format_csv_trim_whitespaces}
|
||||
|
||||
Trims spaces and tabs in non-quoted CSV strings.
|
||||
|
||||
Default value: `true`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_trim_whitespaces=true
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
"string"
|
||||
```
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_trim_whitespaces=false
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
" string "
|
||||
```
|
||||
|
||||
## Values format settings {#values-format-settings}
|
||||
|
||||
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
|
||||
@ -1182,7 +1214,7 @@ Possible values:
|
||||
|
||||
- `bin` - as 16-bytes binary.
|
||||
- `str` - as a string of 36 bytes.
|
||||
- `ext` - as extention with ExtType = 2.
|
||||
- `ext` - as extension with ExtType = 2.
|
||||
|
||||
Default value: `ext`.
|
||||
|
||||
|
@ -452,6 +452,8 @@ Possible values:
|
||||
|
||||
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
|
||||
|
||||
Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
|
||||
|
||||
- hash
|
||||
|
||||
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
|
||||
@ -644,6 +646,48 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
|
||||
However, the block size cannot be more than `max_block_size` rows.
|
||||
By default: 1,000,000. It only works when reading from MergeTree engines.
|
||||
|
||||
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
|
||||
|
||||
The maximum number of simultaneously processed queries related to MergeTree table per user.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
|
||||
```
|
||||
|
||||
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
|
||||
|
||||
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
|
||||
|
||||
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
|
||||
|
||||
Modifying the setting for one query or user does not affect other queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries)
|
||||
|
||||
## merge_tree_min_rows_for_concurrent_read {#setting-merge-tree-min-rows-for-concurrent-read}
|
||||
|
||||
If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
|
||||
@ -1048,6 +1092,12 @@ Timeouts in seconds on the socket used for communicating with the client.
|
||||
|
||||
Default value: 10, 300, 300.
|
||||
|
||||
## handshake_timeout_ms {#handshake-timeout-ms}
|
||||
|
||||
Timeout in milliseconds for receiving Hello packet from replicas during handshake.
|
||||
|
||||
Default value: 10000.
|
||||
|
||||
## cancel_http_readonly_queries_on_client_close {#cancel-http-readonly-queries-on-client-close}
|
||||
|
||||
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
|
||||
@ -1105,7 +1155,7 @@ Default value: `0`.
|
||||
Could be used for throttling speed when replicating the data to add or replace new nodes.
|
||||
|
||||
:::note
|
||||
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
:::
|
||||
|
||||
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
|
||||
@ -1126,7 +1176,7 @@ Default value: `0`.
|
||||
Could be used for throttling speed when replicating the data to add or replace new nodes.
|
||||
|
||||
:::note
|
||||
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
:::
|
||||
|
||||
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
|
||||
@ -1185,6 +1235,36 @@ Disable limit on kafka_num_consumers that depends on the number of available CPU
|
||||
|
||||
Default value: false.
|
||||
|
||||
## postgresql_connection_pool_size {#postgresql-connection-pool-size}
|
||||
|
||||
Connection pool size for PostgreSQL table engine and database engine.
|
||||
|
||||
Default value: 16
|
||||
|
||||
## postgresql_connection_pool_size {#postgresql-connection-pool-size}
|
||||
|
||||
Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.
|
||||
|
||||
Default value: 5000
|
||||
|
||||
## postgresql_connection_pool_auto_close_connection {#postgresql-connection-pool-auto-close-connection}
|
||||
|
||||
Close connection before returning connection to the pool.
|
||||
|
||||
Default value: true.
|
||||
|
||||
## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size}
|
||||
|
||||
Connection pool size for each connection settings string in ODBC bridge.
|
||||
|
||||
Default value: 16
|
||||
|
||||
## odbc_bridge_use_connection_pooling {#odbc-bridge-use-connection-pooling}
|
||||
|
||||
Use connection pooling in ODBC bridge. If set to false, a new connection is created every time.
|
||||
|
||||
Default value: true
|
||||
|
||||
## use_uncompressed_cache {#setting-use_uncompressed_cache}
|
||||
|
||||
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
|
||||
@ -1377,6 +1457,12 @@ Possible values:
|
||||
|
||||
Default value: `default`.
|
||||
|
||||
## allow_experimental_parallel_reading_from_replicas
|
||||
|
||||
If true, ClickHouse will send a SELECT query to all replicas of a table (up to `max_parallel_replicas`) . It will work for any kind of MergeTree table.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## compile_expressions {#compile-expressions}
|
||||
|
||||
Enables or disables compilation of frequently used simple functions and operators to native code with LLVM at runtime.
|
||||
@ -1708,7 +1794,7 @@ Default value: `100000`.
|
||||
|
||||
### async_insert_max_query_number {#async-insert-max-query-number}
|
||||
|
||||
The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#settings-async-insert-deduplicate) is enabled.
|
||||
The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1739,7 +1825,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### async_insert_deduplicate {#settings-async-insert-deduplicate}
|
||||
### async_insert_deduplicate {#async-insert-deduplicate}
|
||||
|
||||
Enables or disables insert deduplication of `ASYNC INSERT` (for Replicated\* tables).
|
||||
|
||||
@ -1992,7 +2078,7 @@ FORMAT PrettyCompactMonoBlock
|
||||
|
||||
## distributed_push_down_limit {#distributed-push-down-limit}
|
||||
|
||||
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
|
||||
Enables or disables [LIMIT](#limit) applying on each shard separately.
|
||||
|
||||
This will allow to avoid:
|
||||
- Sending extra rows over network;
|
||||
@ -2393,7 +2479,7 @@ Default value: 0.
|
||||
|
||||
## allow_introspection_functions {#settings-allow_introspection_functions}
|
||||
|
||||
Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
|
||||
Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -3213,17 +3299,6 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## allow_experimental_geo_types {#allow-experimental-geo-types}
|
||||
|
||||
Allows working with experimental [geo data types](../../sql-reference/data-types/geo.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Working with geo data types is disabled.
|
||||
- 1 — Working with geo data types is enabled.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
|
||||
|
||||
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
|
||||
@ -3465,7 +3540,7 @@ Default value: `0`.
|
||||
|
||||
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
|
||||
|
||||
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
|
||||
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -3566,7 +3641,7 @@ SETTINGS index_granularity = 8192 │
|
||||
|
||||
## external_table_functions_use_nulls {#external-table-functions-use-nulls}
|
||||
|
||||
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md)] table functions use Nullable columns.
|
||||
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md) table functions use Nullable columns.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -3791,8 +3866,8 @@ Result:
|
||||
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
|
||||
|
||||
Enables or disables returning results of type:
|
||||
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth).
|
||||
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot).
|
||||
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toLastDayOfWeek](../../sql-reference/functions/date-time-functions.md#tolastdayofweek) and [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday).
|
||||
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -4154,6 +4229,12 @@ Default value: `2000`
|
||||
If it's enabled, in hedged requests we can start new connection until receiving first data packet even if we have already made some progress
|
||||
(but progress haven't updated for `receive_data_timeout` timeout), otherwise we disable changing replica after the first time we made progress.
|
||||
|
||||
## parallel_view_processing
|
||||
|
||||
Enables pushing to attached views concurrently instead of sequentially.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## partial_result_on_first_cancel {#partial_result_on_first_cancel}
|
||||
When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterwards, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests.
|
||||
|
||||
@ -4223,3 +4304,12 @@ Possible values:
|
||||
- false — Disallow.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## zstd_window_log_max
|
||||
|
||||
Allows you to select the max window log of ZSTD (it will not be used for MergeTree family)
|
||||
|
||||
Type: Int64
|
||||
|
||||
Default: 0
|
||||
|
||||
|
27
docs/en/operations/system-tables/build_options.md
Normal file
27
docs/en/operations/system-tables/build_options.md
Normal file
@ -0,0 +1,27 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/build_options
|
||||
---
|
||||
# build_options
|
||||
|
||||
Contains information about the ClickHouse server's build options.
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` (String) — Name of the build option, e.g. `USE_ODBC`
|
||||
- `value` (String) — Value of the build option, e.g. `1`
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.build_options LIMIT 5
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name─────────────┬─value─┐
|
||||
│ USE_BROTLI │ 1 │
|
||||
│ USE_BZIP2 │ 1 │
|
||||
│ USE_CAPNP │ 1 │
|
||||
│ USE_CASSANDRA │ 1 │
|
||||
│ USE_DATASKETCHES │ 1 │
|
||||
└──────────────────┴───────┘
|
||||
```
|
@ -28,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
|
||||
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
|
||||
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
|
||||
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user