mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Merge branch 'master' into rmt_better_background_tasks_scheduling
This commit is contained in:
commit
d2aa1779ed
5
.github/workflows/pull_request.yml
vendored
5
.github/workflows/pull_request.yml
vendored
@ -46,7 +46,12 @@ jobs:
|
||||
- name: Python unit tests
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
echo "Testing the main ci directory"
|
||||
python3 -m unittest discover -s . -p '*_test.py'
|
||||
for dir in *_lambda/; do
|
||||
echo "Testing $dir"
|
||||
python3 -m unittest discover -s "$dir" -p '*_test.py'
|
||||
done
|
||||
DockerHubPushAarch64:
|
||||
needs: CheckLabels
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
|
13
.gitmodules
vendored
13
.gitmodules
vendored
@ -35,10 +35,9 @@
|
||||
[submodule "contrib/unixodbc"]
|
||||
path = contrib/unixodbc
|
||||
url = https://github.com/ClickHouse/UnixODBC
|
||||
[submodule "contrib/protobuf"]
|
||||
path = contrib/protobuf
|
||||
url = https://github.com/ClickHouse/protobuf
|
||||
branch = v3.13.0.1
|
||||
[submodule "contrib/google-protobuf"]
|
||||
path = contrib/google-protobuf
|
||||
url = https://github.com/ClickHouse/google-protobuf.git
|
||||
[submodule "contrib/boost"]
|
||||
path = contrib/boost
|
||||
url = https://github.com/ClickHouse/boost
|
||||
@ -268,9 +267,6 @@
|
||||
[submodule "contrib/vectorscan"]
|
||||
path = contrib/vectorscan
|
||||
url = https://github.com/VectorCamp/vectorscan.git
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/ClickHouse/c-ares
|
||||
[submodule "contrib/llvm-project"]
|
||||
path = contrib/llvm-project
|
||||
url = https://github.com/ClickHouse/llvm-project
|
||||
@ -344,3 +340,6 @@
|
||||
[submodule "contrib/isa-l"]
|
||||
path = contrib/isa-l
|
||||
url = https://github.com/ClickHouse/isa-l.git
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/c-ares/c-ares.git
|
||||
|
@ -28,14 +28,28 @@ uint64_t getMemoryAmountOrZero()
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
// Try to lookup at the Cgroup limit
|
||||
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
|
||||
if (cgroup_limit.is_open())
|
||||
|
||||
// CGroups v2
|
||||
std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
|
||||
if (cgroupv2_limit.is_open())
|
||||
{
|
||||
uint64_t memory_limit = 0; // in case of read error
|
||||
cgroup_limit >> memory_limit;
|
||||
uint64_t memory_limit = 0;
|
||||
cgroupv2_limit >> memory_limit;
|
||||
if (memory_limit > 0 && memory_limit < memory_amount)
|
||||
memory_amount = memory_limit;
|
||||
}
|
||||
else
|
||||
{
|
||||
// CGroups v1
|
||||
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
|
||||
if (cgroup_limit.is_open())
|
||||
{
|
||||
uint64_t memory_limit = 0; // in case of read error
|
||||
cgroup_limit >> memory_limit;
|
||||
if (memory_limit > 0 && memory_limit < memory_amount)
|
||||
memory_amount = memory_limit;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return memory_amount;
|
||||
|
@ -274,7 +274,9 @@ void SocketImpl::shutdown()
|
||||
|
||||
int SocketImpl::sendBytes(const void* buffer, int length, int flags)
|
||||
{
|
||||
if (_isBrokenTimeout)
|
||||
bool blocking = _blocking && (flags & MSG_DONTWAIT) == 0;
|
||||
|
||||
if (_isBrokenTimeout && blocking)
|
||||
{
|
||||
if (_sndTimeout.totalMicroseconds() != 0)
|
||||
{
|
||||
@ -289,11 +291,13 @@ int SocketImpl::sendBytes(const void* buffer, int length, int flags)
|
||||
if (_sockfd == POCO_INVALID_SOCKET) throw InvalidSocketException();
|
||||
rc = ::send(_sockfd, reinterpret_cast<const char*>(buffer), length, flags);
|
||||
}
|
||||
while (_blocking && rc < 0 && lastError() == POCO_EINTR);
|
||||
while (blocking && rc < 0 && lastError() == POCO_EINTR);
|
||||
if (rc < 0)
|
||||
{
|
||||
int err = lastError();
|
||||
if (err == POCO_EAGAIN || err == POCO_ETIMEDOUT)
|
||||
if ((err == POCO_EAGAIN || err == POCO_EWOULDBLOCK) && !blocking)
|
||||
;
|
||||
else if (err == POCO_EAGAIN || err == POCO_ETIMEDOUT)
|
||||
throw TimeoutException();
|
||||
else
|
||||
error(err);
|
||||
|
@ -183,6 +183,16 @@ namespace Net
|
||||
/// Returns true iff a reused session was negotiated during
|
||||
/// the handshake.
|
||||
|
||||
virtual void setBlocking(bool flag);
|
||||
/// Sets the socket in blocking mode if flag is true,
|
||||
/// disables blocking mode if flag is false.
|
||||
|
||||
virtual bool getBlocking() const;
|
||||
/// Returns the blocking mode of the socket.
|
||||
/// This method will only work if the blocking modes of
|
||||
/// the socket are changed via the setBlocking method!
|
||||
|
||||
|
||||
protected:
|
||||
void acceptSSL();
|
||||
/// Assume per-object mutex is locked.
|
||||
|
@ -201,6 +201,16 @@ namespace Net
|
||||
/// Returns true iff a reused session was negotiated during
|
||||
/// the handshake.
|
||||
|
||||
virtual void setBlocking(bool flag);
|
||||
/// Sets the socket in blocking mode if flag is true,
|
||||
/// disables blocking mode if flag is false.
|
||||
|
||||
virtual bool getBlocking() const;
|
||||
/// Returns the blocking mode of the socket.
|
||||
/// This method will only work if the blocking modes of
|
||||
/// the socket are changed via the setBlocking method!
|
||||
|
||||
|
||||
protected:
|
||||
void acceptSSL();
|
||||
/// Performs a SSL server-side handshake.
|
||||
|
@ -629,5 +629,15 @@ bool SecureSocketImpl::sessionWasReused()
|
||||
return false;
|
||||
}
|
||||
|
||||
void SecureSocketImpl::setBlocking(bool flag)
|
||||
{
|
||||
_pSocket->setBlocking(flag);
|
||||
}
|
||||
|
||||
bool SecureSocketImpl::getBlocking() const
|
||||
{
|
||||
return _pSocket->getBlocking();
|
||||
}
|
||||
|
||||
|
||||
} } // namespace Poco::Net
|
||||
|
@ -237,5 +237,15 @@ int SecureStreamSocketImpl::completeHandshake()
|
||||
return _impl.completeHandshake();
|
||||
}
|
||||
|
||||
bool SecureStreamSocketImpl::getBlocking() const
|
||||
{
|
||||
return _impl.getBlocking();
|
||||
}
|
||||
|
||||
void SecureStreamSocketImpl::setBlocking(bool flag)
|
||||
{
|
||||
_impl.setBlocking(flag);
|
||||
}
|
||||
|
||||
|
||||
} } // namespace Poco::Net
|
||||
|
@ -1,2 +1,2 @@
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
|
||||
tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1
|
||||
wget https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
|
||||
tar xJf MacOSX11.0.sdk.tar.xz --strip-components=1
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -88,7 +88,7 @@ add_contrib (thrift-cmake thrift)
|
||||
# parquet/arrow/orc
|
||||
add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion
|
||||
add_contrib (avro-cmake avro) # requires: snappy
|
||||
add_contrib (protobuf-cmake protobuf)
|
||||
add_contrib (google-protobuf-cmake google-protobuf)
|
||||
add_contrib (openldap-cmake openldap)
|
||||
add_contrib (grpc-cmake grpc)
|
||||
add_contrib (msgpack-c-cmake msgpack-c)
|
||||
@ -156,7 +156,7 @@ add_contrib (libgsasl-cmake libgsasl) # requires krb5
|
||||
add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
|
||||
add_contrib (nats-io-cmake nats-io)
|
||||
add_contrib (isa-l-cmake isa-l)
|
||||
add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l
|
||||
add_contrib (libhdfs3-cmake libhdfs3) # requires: google-protobuf, krb5, isa-l
|
||||
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
|
||||
add_contrib (cppkafka-cmake cppkafka)
|
||||
add_contrib (libpqxx-cmake libpqxx)
|
||||
|
2
contrib/c-ares
vendored
2
contrib/c-ares
vendored
@ -1 +1 @@
|
||||
Subproject commit afee6748b0b99acf4509d42fa37ac8422262f91b
|
||||
Subproject commit 6360e96b5cf8e5980c887ce58ef727e53d77243a
|
@ -48,6 +48,7 @@ SET(SRCS
|
||||
"${LIBRARY_DIR}/src/lib/ares_platform.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_process.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_query.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_rand.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_search.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_send.c"
|
||||
"${LIBRARY_DIR}/src/lib/ares_strcasecmp.c"
|
||||
|
1
contrib/google-protobuf
vendored
Submodule
1
contrib/google-protobuf
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit c47efe2d8f6a60022b49ecd6cc23660687c8598f
|
@ -5,7 +5,7 @@ if(NOT ENABLE_PROTOBUF)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
|
||||
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
|
||||
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
|
||||
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
|
||||
# #include <sanitizer/asan_interface.h>
|
||||
@ -17,8 +17,8 @@ if(OS_FREEBSD AND SANITIZE STREQUAL "address")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/protobuf")
|
||||
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/protobuf")
|
||||
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf")
|
||||
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/google-protobuf")
|
||||
|
||||
|
||||
add_definitions(-DGOOGLE_PROTOBUF_CMAKE_BUILD)
|
||||
@ -35,7 +35,6 @@ set(libprotobuf_lite_files
|
||||
${protobuf_source_dir}/src/google/protobuf/arena.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/field_access_listener.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven_lite.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
|
||||
@ -86,6 +85,7 @@ set(libprotobuf_files
|
||||
${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven.cc
|
||||
${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc
|
||||
@ -316,7 +316,7 @@ else ()
|
||||
add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc")
|
||||
endif ()
|
||||
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake")
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
|
||||
|
||||
add_library(_protobuf INTERFACE)
|
||||
target_link_libraries(_protobuf INTERFACE _libprotobuf)
|
2
contrib/libgsasl
vendored
2
contrib/libgsasl
vendored
@ -1 +1 @@
|
||||
Subproject commit f4e7bf0bb068030d57266f87ccac4c8c012fb5c4
|
||||
Subproject commit 0fb79e7609ae5a5e015a41d24bcbadd48f8f5469
|
2
contrib/libxml2
vendored
2
contrib/libxml2
vendored
@ -1 +1 @@
|
||||
Subproject commit f507d167f1755b7eaea09fb1a44d29aab828b6d1
|
||||
Subproject commit 223cb03a5d27b1b2393b266a8657443d046139d6
|
1
contrib/protobuf
vendored
1
contrib/protobuf
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2
|
@ -46,10 +46,12 @@ ENV CXX=clang++-${LLVM_VERSION}
|
||||
# Rust toolchain and libraries
|
||||
ENV RUSTUP_HOME=/rust/rustup
|
||||
ENV CARGO_HOME=/rust/cargo
|
||||
ENV PATH="/rust/cargo/env:${PATH}"
|
||||
ENV PATH="/rust/cargo/bin:${PATH}"
|
||||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
|
||||
chmod 777 -R /rust && \
|
||||
rustup toolchain install nightly && \
|
||||
rustup default nightly && \
|
||||
rustup component add rust-src && \
|
||||
rustup target add aarch64-unknown-linux-gnu && \
|
||||
rustup target add x86_64-apple-darwin && \
|
||||
rustup target add x86_64-unknown-freebsd && \
|
||||
|
@ -11,9 +11,11 @@ ccache_status () {
|
||||
|
||||
[ -O /build ] || git config --global --add safe.directory /build
|
||||
|
||||
mkdir -p /build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
|
||||
if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then
|
||||
mkdir -p /build/cmake/toolchain/darwin-x86_64
|
||||
tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1
|
||||
ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64
|
||||
fi
|
||||
|
||||
# Uncomment to debug ccache. Don't put ccache log in /output right away, or it
|
||||
# will be confusingly packed into the "performance" package.
|
||||
|
@ -167,6 +167,7 @@ def parse_env_variables(
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake"
|
||||
)
|
||||
result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
|
||||
elif is_cross_darwin_arm:
|
||||
cc = compiler[: -len(DARWIN_ARM_SUFFIX)]
|
||||
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
|
||||
@ -181,6 +182,7 @@ def parse_env_variables(
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake"
|
||||
)
|
||||
result.append("EXTRACT_TOOLCHAIN_DARWIN=1")
|
||||
elif is_cross_arm:
|
||||
cc = compiler[: -len(ARM_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
|
@ -626,7 +626,9 @@ if args.report == "main":
|
||||
message_array.append(str(faster_queries) + " faster")
|
||||
|
||||
if slower_queries:
|
||||
if slower_queries > 3:
|
||||
# This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/tests/ci/performance_comparison_check.py#L225
|
||||
# False positives rate should be < 1%: https://shorturl.at/CDEK8
|
||||
if slower_queries > 5:
|
||||
status = "failure"
|
||||
message_array.append(str(slower_queries) + " slower")
|
||||
|
||||
|
@ -3,5 +3,5 @@
|
||||
set -x
|
||||
|
||||
service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test '';
|
||||
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
||||
|
@ -119,7 +119,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
|
||||
|
||||
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
|
||||
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication.
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
||||
|
@ -55,7 +55,7 @@ ATTACH TABLE postgres_database.new_table;
|
||||
```
|
||||
|
||||
:::warning
|
||||
Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
|
||||
Before version 22.1, adding a table to replication left a non-removed temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
|
||||
:::
|
||||
|
||||
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
|
||||
@ -257,7 +257,7 @@ Please note that this should be used only if it is actually needed. If there is
|
||||
|
||||
1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
|
||||
|
||||
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
|
||||
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privilege.
|
||||
|
||||
3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.
|
||||
|
||||
|
@ -30,7 +30,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
||||
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
|
||||
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multi-tasked.
|
||||
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
@ -120,3 +120,93 @@ Values can be updated using the `ALTER TABLE` query. The primary key cannot be u
|
||||
```sql
|
||||
ALTER TABLE test UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
|
||||
```
|
||||
|
||||
### Joins
|
||||
|
||||
A special `direct` join with EmbeddedRocksDB tables is supported.
|
||||
This direct join avoids forming a hash table in memory and accesses
|
||||
the data directly from the EmbeddedRocksDB.
|
||||
|
||||
With large joins you may see much lower memory usage with direct joins
|
||||
because the hash table is not created.
|
||||
|
||||
To enable direct joins:
|
||||
```sql
|
||||
SET join_algorithm = 'direct, hash'
|
||||
```
|
||||
|
||||
:::tip
|
||||
When the `join_algorithm` is set to `direct, hash`, direct joins will be used
|
||||
when possible, and hash otherwise.
|
||||
:::
|
||||
|
||||
#### Example
|
||||
|
||||
##### Create and populate an EmbeddedRocksDB table:
|
||||
```sql
|
||||
CREATE TABLE rdb
|
||||
(
|
||||
`key` UInt32,
|
||||
`value` Array(UInt32),
|
||||
`value2` String
|
||||
)
|
||||
ENGINE = EmbeddedRocksDB
|
||||
PRIMARY KEY key
|
||||
```
|
||||
|
||||
```sql
|
||||
INSERT INTO rdb
|
||||
SELECT
|
||||
toUInt32(sipHash64(number) % 10) as key,
|
||||
[key, key+1] as value,
|
||||
('val2' || toString(key)) as value2
|
||||
FROM numbers_mt(10);
|
||||
```
|
||||
|
||||
##### Create and populate a table to join with table `rdb`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE t2
|
||||
(
|
||||
`k` UInt16
|
||||
)
|
||||
ENGINE = TinyLog
|
||||
```
|
||||
|
||||
```sql
|
||||
INSERT INTO t2 SELECT number AS k
|
||||
FROM numbers_mt(10)
|
||||
```
|
||||
|
||||
##### Set the join algorithm to `direct`:
|
||||
|
||||
```sql
|
||||
SET join_algorithm = 'direct'
|
||||
```
|
||||
|
||||
##### An INNER JOIN:
|
||||
```sql
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT k AS key
|
||||
FROM t2
|
||||
) AS t2
|
||||
INNER JOIN rdb ON rdb.key = t2.key
|
||||
ORDER BY key ASC
|
||||
```
|
||||
```response
|
||||
┌─key─┬─rdb.key─┬─value──┬─value2─┐
|
||||
│ 0 │ 0 │ [0,1] │ val20 │
|
||||
│ 2 │ 2 │ [2,3] │ val22 │
|
||||
│ 3 │ 3 │ [3,4] │ val23 │
|
||||
│ 6 │ 6 │ [6,7] │ val26 │
|
||||
│ 7 │ 7 │ [7,8] │ val27 │
|
||||
│ 8 │ 8 │ [8,9] │ val28 │
|
||||
│ 9 │ 9 │ [9,10] │ val29 │
|
||||
└─────┴─────────┴────────┴────────┘
|
||||
```
|
||||
|
||||
### More information on Joins
|
||||
- [`join_algorithm` setting](/docs/en/operations/settings/settings.md#settings-join_algorithm)
|
||||
- [JOIN clause](/docs/en/sql-reference/statements/select/join.md)
|
||||
|
@ -156,7 +156,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|
||||
| rpc\_client\_connect\_timeout | 600 * 1000 |
|
||||
| rpc\_client\_read\_timeout | 3600 * 1000 |
|
||||
| rpc\_client\_write\_timeout | 3600 * 1000 |
|
||||
| rpc\_client\_socekt\_linger\_timeout | -1 |
|
||||
| rpc\_client\_socket\_linger\_timeout | -1 |
|
||||
| rpc\_client\_connect\_retry | 10 |
|
||||
| rpc\_client\_timeout | 3600 * 1000 |
|
||||
| dfs\_default\_replica | 3 |
|
||||
@ -176,7 +176,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|
||||
| output\_write\_timeout | 3600 * 1000 |
|
||||
| output\_close\_timeout | 3600 * 1000 |
|
||||
| output\_packetpool\_size | 1024 |
|
||||
| output\_heeartbeat\_interval | 10 * 1000 |
|
||||
| output\_heartbeat\_interval | 10 * 1000 |
|
||||
| dfs\_client\_failover\_max\_attempts | 15 |
|
||||
| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 |
|
||||
| dfs\_client\_socketcache\_expiryMsec | 3000 |
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Hive
|
||||
|
||||
# Hive
|
||||
|
||||
The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
|
||||
The Hive engine allows you to perform `SELECT` queries on HDFS Hive table. Currently it supports input formats as below:
|
||||
|
||||
- Text: only supports simple scalar column types except `binary`
|
||||
|
||||
|
@ -10,7 +10,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
|
||||
|
||||
`NATS` lets you:
|
||||
|
||||
- Publish or subcribe to message subjects.
|
||||
- Publish or subscribe to message subjects.
|
||||
- Process new messages as they become available.
|
||||
|
||||
## Creating a Table {#table_engine-redisstreams-creating-a-table}
|
||||
@ -46,7 +46,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
Required parameters:
|
||||
|
||||
- `nats_url` – host:port (for example, `localhost:5672`)..
|
||||
- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
|
||||
- `nats_subjects` – List of subject for NATS table to subscribe/publish to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
|
||||
- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
|
||||
|
||||
Optional parameters:
|
||||
|
@ -57,7 +57,7 @@ or via config (since version 21.11):
|
||||
</named_collections>
|
||||
```
|
||||
|
||||
Some parameters can be overriden by key value arguments:
|
||||
Some parameters can be overridden by key value arguments:
|
||||
``` sql
|
||||
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
|
||||
```
|
||||
|
@ -42,7 +42,6 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
[rabbitmq_queue_consume = false,]
|
||||
[rabbitmq_address = '',]
|
||||
[rabbitmq_vhost = '/',]
|
||||
[rabbitmq_queue_consume = false,]
|
||||
[rabbitmq_username = '',]
|
||||
[rabbitmq_password = '',]
|
||||
[rabbitmq_commit_on_select = false,]
|
||||
|
@ -23,7 +23,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
|
||||
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
|
||||
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
|
||||
|
||||
### PARTITION BY
|
||||
|
||||
@ -140,8 +140,8 @@ The following settings can be set before query execution or placed into configur
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
|
||||
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. DEfault value us `500`.
|
||||
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each inflight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enought, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
|
||||
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
|
||||
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
|
||||
|
||||
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
|
||||
|
||||
|
@ -109,7 +109,7 @@ INSERT INTO test.visits (StartDate, CounterID, Sign, UserID)
|
||||
VALUES (1667446031, 1, 6, 3)
|
||||
```
|
||||
|
||||
The data are inserted in both the table and the materialized view `test.mv_visits`.
|
||||
The data is inserted in both the table and the materialized view `test.mv_visits`.
|
||||
|
||||
To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`:
|
||||
|
||||
|
@ -78,7 +78,7 @@ ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
|
||||
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyper parameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
|
||||
|
||||
As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
|
||||
|
||||
@ -135,7 +135,7 @@ ORDER BY id;
|
||||
|
||||
Annoy supports `L2Distance` and `cosineDistance`.
|
||||
|
||||
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
|
||||
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time trade-off between better accuracy and speed.
|
||||
|
||||
__Example__:
|
||||
``` sql
|
||||
|
@ -165,7 +165,7 @@ Performance of such a query heavily depends on the table layout. Because of that
|
||||
|
||||
The key factors for a good performance:
|
||||
|
||||
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine
|
||||
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will under-utilize the machine
|
||||
- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing
|
||||
- partitions should be comparable in size, so all threads will do roughly the same amount of work
|
||||
|
||||
|
@ -15,6 +15,18 @@ tokenized cells of the string column. For example, the string cell "I will be a
|
||||
" wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
|
||||
useful the resulting inverted index will be.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/O_MnyUkrIq8"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
:::note
|
||||
Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
|
||||
ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.
|
||||
|
@ -779,7 +779,7 @@ Disks, volumes and storage policies should be declared inside the `<storage_conf
|
||||
|
||||
:::tip
|
||||
Disks can also be declared in the `SETTINGS` section of a query. This is useful
|
||||
for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
|
||||
for ad-hoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
|
||||
See [dynamic storage](#dynamic-storage) for more details.
|
||||
:::
|
||||
|
||||
@ -856,7 +856,7 @@ Tags:
|
||||
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
|
||||
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
|
||||
|
||||
Cofiguration examples:
|
||||
Configuration examples:
|
||||
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
@ -1224,7 +1224,7 @@ Limit parameters (mainly for internal usage):
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
|
@ -65,7 +65,7 @@ if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
|
||||
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings every time you run a `SELECT` from `my_executable_table`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_executable_table (
|
||||
@ -223,4 +223,4 @@ SETTINGS
|
||||
pool_size = 4;
|
||||
```
|
||||
|
||||
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
|
||||
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
|
||||
|
@ -72,7 +72,7 @@ Additionally, number of keys will have a soft limit of 4 for the number of keys.
|
||||
|
||||
If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it.
|
||||
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
|
||||
Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect.
|
||||
Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.
|
||||
|
||||
## Supported operations {#table_engine-KeeperMap-supported-operations}
|
||||
|
||||
|
@ -87,7 +87,7 @@ ORDER BY (marketplace, review_date, product_category);
|
||||
|
||||
3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include.
|
||||
|
||||
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
|
||||
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of multiple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
|
||||
|
||||
```sql
|
||||
INSERT INTO amazon_reviews
|
||||
@ -473,4 +473,4 @@ It runs quite a bit faster - which means the cache is helping us out here:
|
||||
└────────────┴───────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘
|
||||
|
||||
50 rows in set. Elapsed: 33.954 sec. Processed 150.96 million rows, 68.95 GB (4.45 million rows/s., 2.03 GB/s.)
|
||||
```
|
||||
```
|
||||
|
@ -317,7 +317,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
|
||||
Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
|
||||
:::
|
||||
|
||||
![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
|
||||
![Add ClickHouse as a Superset data source](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
|
||||
|
||||
### Add the table **cell_towers** as a Superset **dataset**
|
||||
|
||||
@ -364,5 +364,5 @@ The data is also available for interactive queries in the [Playground](https://p
|
||||
|
||||
This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
|
||||
|
||||
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
|
||||
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the host name and port number).
|
||||
:::
|
||||
|
@ -806,7 +806,7 @@ FROM
|
||||
31 rows in set. Elapsed: 0.043 sec. Processed 7.54 million rows, 40.53 MB (176.71 million rows/s., 950.40 MB/s.)
|
||||
```
|
||||
|
||||
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unrealiable due to the filtering of the docs filter during data insertion.
|
||||
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unreliable due to the filtering of the docs filter during data insertion.
|
||||
|
||||
## Authors with the most diverse impact
|
||||
|
||||
@ -940,7 +940,7 @@ LIMIT 10
|
||||
10 rows in set. Elapsed: 0.106 sec. Processed 798.15 thousand rows, 13.97 MB (7.51 million rows/s., 131.41 MB/s.)
|
||||
```
|
||||
|
||||
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the basename of the file to identify his popular files - this allows for renames and should focus on code contributions.
|
||||
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the base name of the file to identify his popular files - this allows for renames and should focus on code contributions.
|
||||
|
||||
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBiYXNlLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIChhdXRob3IgPSAnQWxleGV5IE1pbG92aWRvdicpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYmFzZW5hbWUocGF0aCkgQVMgYmFzZQpPUkRFUiBCWSBjIERFU0MKTElNSVQgMTA=)
|
||||
|
||||
|
@ -75,7 +75,7 @@ SELECT
|
||||
payment_type,
|
||||
pickup_ntaname,
|
||||
dropoff_ntaname
|
||||
FROM s3(
|
||||
FROM gcs(
|
||||
'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz',
|
||||
'TabSeparatedWithNames'
|
||||
);
|
||||
|
@ -9,7 +9,7 @@ The data in this dataset is derived and cleaned from the full OpenSky dataset to
|
||||
|
||||
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
|
||||
|
||||
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
|
||||
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
|
||||
"Crowdsourced air traffic data from the OpenSky Network 2019–2020"
|
||||
Earth System Science Data 13(2), 2021
|
||||
https://doi.org/10.5194/essd-13-357-2021
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: Reddit comments
|
||||
|
||||
# Reddit comments dataset
|
||||
|
||||
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 7B rows of data. The raw data is in JSON format in compressed `.zst` files and the rows look like the following:
|
||||
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 14B rows of data. The raw data is in JSON format in compressed files and the rows look like the following:
|
||||
|
||||
```json
|
||||
{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411}
|
||||
@ -18,7 +18,7 @@ This dataset contains publicly-available comments on Reddit that go back to Dece
|
||||
A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket.
|
||||
|
||||
:::note
|
||||
The following commands were executed on ClickHouse Cloud. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
|
||||
The following commands were executed on a Production instance of ClickHouse Cloud with the minimum memory set to 720GB. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
|
||||
:::
|
||||
|
||||
1. Let's create a table for the Reddit data:
|
||||
@ -75,18 +75,6 @@ The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `
|
||||
|
||||
2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
|
||||
'JSONEachRow'
|
||||
);
|
||||
```
|
||||
|
||||
If you do not have a cluster, use `s3` instead of `s3Cluster`:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
@ -94,6 +82,7 @@ INSERT INTO reddit
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
|
||||
'JSONEachRow'
|
||||
);
|
||||
|
||||
```
|
||||
|
||||
3. It will take a while depending on your resources, but when it's done verify it worked:
|
||||
@ -198,26 +187,81 @@ LIMIT 10;
|
||||
TRUNCATE TABLE reddit;
|
||||
```
|
||||
|
||||
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. When you're ready, run this command to insert all the rows. (It takes a while - up to 17 hours!)
|
||||
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. For practical reasons, it works well to insert the data by years starting with...
|
||||
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2005*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
...and ending with:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC*',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2023*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
The response looks like:
|
||||
If you do not have a cluster, use `s3` instead of `s3Cluster`:
|
||||
|
||||
```response
|
||||
0 rows in set. Elapsed: 61187.839 sec. Processed 6.74 billion rows, 2.06 TB (110.17 thousand rows/s., 33.68 MB/s.)
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC_2005*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
8. Let's see how many rows were inserted and how much disk space the table is using:
|
||||
8. To verify it worked, here are the number of rows per year (as of February, 2023):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(created_utc) AS year,
|
||||
formatReadableQuantity(count())
|
||||
FROM reddit
|
||||
GROUP BY year;
|
||||
```
|
||||
|
||||
```response
|
||||
|
||||
┌─year─┬─formatReadableQuantity(count())─┐
|
||||
│ 2005 │ 1.07 thousand │
|
||||
│ 2006 │ 417.18 thousand │
|
||||
│ 2007 │ 2.46 million │
|
||||
│ 2008 │ 7.24 million │
|
||||
│ 2009 │ 18.86 million │
|
||||
│ 2010 │ 42.93 million │
|
||||
│ 2011 │ 28.91 million │
|
||||
│ 2012 │ 260.31 million │
|
||||
│ 2013 │ 402.21 million │
|
||||
│ 2014 │ 531.80 million │
|
||||
│ 2015 │ 667.76 million │
|
||||
│ 2016 │ 799.90 million │
|
||||
│ 2017 │ 972.86 million │
|
||||
│ 2018 │ 1.24 billion │
|
||||
│ 2019 │ 1.66 billion │
|
||||
│ 2020 │ 2.16 billion │
|
||||
│ 2021 │ 2.59 billion │
|
||||
│ 2022 │ 2.82 billion │
|
||||
│ 2023 │ 474.86 million │
|
||||
└──────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
9. Let's see how many rows were inserted and how much disk space the table is using:
|
||||
|
||||
|
||||
```sql
|
||||
@ -227,17 +271,17 @@ SELECT
|
||||
formatReadableSize(sum(bytes)) AS disk_size,
|
||||
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
|
||||
FROM system.parts
|
||||
WHERE (table = 'reddit') AND active
|
||||
WHERE (table = 'reddit') AND active;
|
||||
```
|
||||
|
||||
Notice the compression of disk storage is about 1/3 of the uncompressed size:
|
||||
|
||||
```response
|
||||
┌──────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size──┬─uncompressed_size─┐
|
||||
│ 6739503568 │ 6.74 billion │ 501.10 GiB │ 1.51 TiB │
|
||||
└────────────┴───────────────────────────────────┴────────────┴───────────────────┘
|
||||
┌───────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size─┬─uncompressed_size─┐
|
||||
│ 14688534662 │ 14.69 billion │ 1.03 TiB │ 3.26 TiB │
|
||||
└─────────────┴───────────────────────────────────┴───────────┴───────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.010 sec.
|
||||
1 row in set. Elapsed: 0.005 sec.
|
||||
```
|
||||
|
||||
9. The following query shows how many comments, authors and subreddits we have for each month:
|
||||
@ -256,185 +300,216 @@ GROUP BY firstOfMonth
|
||||
ORDER BY firstOfMonth ASC;
|
||||
```
|
||||
|
||||
This is a substantial query that has to process all 6.74 billion rows, but we still get an impressive response time (about 3 minutes):
|
||||
This is a substantial query that has to process all 14.69 billion rows, but we still get an impressive response time (about 48 seconds):
|
||||
|
||||
```response
|
||||
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬─authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
|
||||
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
|
||||
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
|
||||
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
|
||||
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
|
||||
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
|
||||
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
|
||||
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
|
||||
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
|
||||
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
|
||||
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
|
||||
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
|
||||
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
|
||||
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
|
||||
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
|
||||
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
|
||||
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
|
||||
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
|
||||
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
|
||||
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
|
||||
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
|
||||
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
|
||||
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
|
||||
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
|
||||
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
|
||||
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
|
||||
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
|
||||
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
|
||||
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
|
||||
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
|
||||
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
|
||||
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
|
||||
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
|
||||
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
|
||||
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
|
||||
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
|
||||
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
|
||||
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
|
||||
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
|
||||
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
|
||||
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
|
||||
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
|
||||
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
|
||||
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
|
||||
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
|
||||
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
|
||||
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
|
||||
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
|
||||
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
|
||||
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
|
||||
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
|
||||
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
|
||||
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
|
||||
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
|
||||
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
|
||||
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
|
||||
│ 2010-07-01 │ 4032737 │ ██ │ 153451 │ ▊ │ 3662 │ ▉ │
|
||||
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
|
||||
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
|
||||
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
|
||||
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
|
||||
│ 2010-12-01 │ 5972642 │ ██▉ │ 245824 │ █▏ │ 4692 │ █▏ │
|
||||
│ 2011-01-01 │ 6603329 │ ███▎ │ 270025 │ █▎ │ 5141 │ █▎ │
|
||||
│ 2011-02-01 │ 6363114 │ ███▏ │ 277593 │ █▍ │ 5202 │ █▎ │
|
||||
│ 2011-03-01 │ 7556165 │ ███▊ │ 314748 │ █▌ │ 5445 │ █▎ │
|
||||
│ 2011-04-01 │ 7571398 │ ███▊ │ 329920 │ █▋ │ 6128 │ █▌ │
|
||||
│ 2011-05-01 │ 8803949 │ ████▍ │ 365013 │ █▊ │ 6834 │ █▋ │
|
||||
│ 2011-06-01 │ 9766511 │ ████▉ │ 393945 │ █▉ │ 7519 │ █▉ │
|
||||
│ 2011-07-01 │ 10557466 │ █████▎ │ 424235 │ ██ │ 8293 │ ██ │
|
||||
│ 2011-08-01 │ 12316144 │ ██████▏ │ 475326 │ ██▍ │ 9657 │ ██▍ │
|
||||
│ 2011-09-01 │ 12150412 │ ██████ │ 503142 │ ██▌ │ 10278 │ ██▌ │
|
||||
│ 2011-10-01 │ 13470278 │ ██████▋ │ 548801 │ ██▋ │ 10922 │ ██▋ │
|
||||
│ 2011-11-01 │ 13621533 │ ██████▊ │ 574435 │ ██▊ │ 11572 │ ██▉ │
|
||||
│ 2011-12-01 │ 14509469 │ ███████▎ │ 622849 │ ███ │ 12335 │ ███ │
|
||||
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
|
||||
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
|
||||
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
|
||||
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
|
||||
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
|
||||
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
|
||||
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
|
||||
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
|
||||
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
|
||||
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
|
||||
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
|
||||
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
|
||||
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
|
||||
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
|
||||
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
|
||||
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
|
||||
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
|
||||
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
|
||||
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
|
||||
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
|
||||
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
|
||||
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
|
||||
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
|
||||
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
|
||||
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
|
||||
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
|
||||
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
|
||||
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
|
||||
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
|
||||
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
|
||||
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
|
||||
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
|
||||
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
|
||||
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
|
||||
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
|
||||
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
|
||||
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
|
||||
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
|
||||
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
|
||||
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
|
||||
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
|
||||
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
|
||||
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
|
||||
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
|
||||
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
|
||||
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
|
||||
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
|
||||
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
|
||||
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
|
||||
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
|
||||
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
|
||||
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
|
||||
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
|
||||
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
|
||||
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
|
||||
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
|
||||
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
|
||||
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
|
||||
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
|
||||
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
|
||||
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
|
||||
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
|
||||
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
|
||||
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
|
||||
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
|
||||
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
|
||||
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
|
||||
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
|
||||
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
|
||||
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
|
||||
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
|
||||
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
|
||||
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
|
||||
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
|
||||
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
|
||||
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
|
||||
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
|
||||
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
|
||||
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
|
||||
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
|
||||
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
|
||||
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
|
||||
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
|
||||
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
|
||||
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
|
||||
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
|
||||
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
|
||||
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
|
||||
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
|
||||
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
|
||||
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
|
||||
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
|
||||
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
|
||||
│ 2019-10-01 │ 129771456 │ █████████████████████████ │ 6825690 │ █████████████████████████ │ 144453 │ █████████████████████████ │
|
||||
│ 2019-11-01 │ 107990259 │ █████████████████████████ │ 6368286 │ █████████████████████████ │ 141768 │ █████████████████████████ │
|
||||
│ 2019-12-01 │ 112895934 │ █████████████████████████ │ 6640902 │ █████████████████████████ │ 148277 │ █████████████████████████ │
|
||||
│ 2020-01-01 │ 54354879 │ █████████████████████████ │ 4782339 │ ███████████████████████▉ │ 111658 │ █████████████████████████ │
|
||||
│ 2020-02-01 │ 22696923 │ ███████████▎ │ 3135175 │ ███████████████▋ │ 79521 │ ███████████████████▉ │
|
||||
│ 2020-03-01 │ 3466677 │ █▋ │ 987960 │ ████▉ │ 40901 │ ██████████▏ │
|
||||
└──────────────┴───────────┴───────────────────────────┴─────────┴───────────────────────────┴────────────┴───────────────────────────┘
|
||||
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬──authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
|
||||
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
|
||||
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
|
||||
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
|
||||
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
|
||||
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
|
||||
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
|
||||
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
|
||||
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
|
||||
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
|
||||
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
|
||||
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
|
||||
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
|
||||
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
|
||||
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
|
||||
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
|
||||
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
|
||||
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
|
||||
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
|
||||
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
|
||||
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
|
||||
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
|
||||
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
|
||||
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
|
||||
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
|
||||
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
|
||||
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
|
||||
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
|
||||
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
|
||||
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
|
||||
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
|
||||
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
|
||||
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
|
||||
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
|
||||
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
|
||||
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
|
||||
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
|
||||
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
|
||||
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
|
||||
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
|
||||
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
|
||||
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
|
||||
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
|
||||
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
|
||||
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
|
||||
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
|
||||
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
|
||||
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
|
||||
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
|
||||
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
|
||||
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
|
||||
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
|
||||
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
|
||||
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
|
||||
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
|
||||
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
|
||||
│ 2010-07-01 │ 806612 │ ▍ │ 76486 │ ▍ │ 1955 │ ▍ │
|
||||
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
|
||||
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
|
||||
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
|
||||
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
|
||||
│ 2010-12-01 │ 3642690 │ █▊ │ 196847 │ ▉ │ 3914 │ ▉ │
|
||||
│ 2011-01-01 │ 3924540 │ █▉ │ 215057 │ █ │ 4240 │ █ │
|
||||
│ 2011-02-01 │ 3859131 │ █▉ │ 223485 │ █ │ 4371 │ █ │
|
||||
│ 2011-03-01 │ 2877996 │ █▍ │ 208607 │ █ │ 3870 │ ▉ │
|
||||
│ 2011-04-01 │ 3859131 │ █▉ │ 248931 │ █▏ │ 4881 │ █▏ │
|
||||
│ 2011-06-01 │ 3859131 │ █▉ │ 267197 │ █▎ │ 5255 │ █▎ │
|
||||
│ 2011-08-01 │ 2943405 │ █▍ │ 259428 │ █▎ │ 5806 │ █▍ │
|
||||
│ 2011-10-01 │ 3859131 │ █▉ │ 327342 │ █▋ │ 6958 │ █▋ │
|
||||
│ 2011-12-01 │ 3728313 │ █▊ │ 354817 │ █▊ │ 7713 │ █▉ │
|
||||
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
|
||||
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
|
||||
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
|
||||
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
|
||||
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
|
||||
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
|
||||
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
|
||||
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
|
||||
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
|
||||
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
|
||||
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
|
||||
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
|
||||
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
|
||||
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
|
||||
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
|
||||
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
|
||||
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
|
||||
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
|
||||
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
|
||||
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
|
||||
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
|
||||
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
|
||||
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
|
||||
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
|
||||
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
|
||||
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
|
||||
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
|
||||
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
|
||||
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
|
||||
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
|
||||
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
|
||||
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
|
||||
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
|
||||
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
|
||||
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
|
||||
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
|
||||
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
|
||||
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
|
||||
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
|
||||
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
|
||||
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
|
||||
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
|
||||
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
|
||||
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
|
||||
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
|
||||
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
|
||||
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
|
||||
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
|
||||
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
|
||||
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
|
||||
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
|
||||
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
|
||||
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
|
||||
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
|
||||
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
|
||||
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
|
||||
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
|
||||
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
|
||||
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
|
||||
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
|
||||
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
|
||||
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
|
||||
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
|
||||
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
|
||||
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
|
||||
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
|
||||
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
|
||||
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
|
||||
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
|
||||
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
|
||||
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
|
||||
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
|
||||
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
|
||||
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
|
||||
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
|
||||
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
|
||||
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
|
||||
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
|
||||
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
|
||||
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
|
||||
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
|
||||
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
|
||||
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
|
||||
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
|
||||
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
|
||||
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
|
||||
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
|
||||
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
|
||||
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
|
||||
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
|
||||
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
|
||||
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
|
||||
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
|
||||
│ 2019-10-01 │ 145909884 │ █████████████████████████ │ 7160126 │ █████████████████████████ │ 152075 │ █████████████████████████ │
|
||||
│ 2019-11-01 │ 138512489 │ █████████████████████████ │ 7098723 │ █████████████████████████ │ 164597 │ █████████████████████████ │
|
||||
│ 2019-12-01 │ 146012313 │ █████████████████████████ │ 7438261 │ █████████████████████████ │ 166966 │ █████████████████████████ │
|
||||
│ 2020-01-01 │ 153498208 │ █████████████████████████ │ 7703548 │ █████████████████████████ │ 174390 │ █████████████████████████ │
|
||||
│ 2020-02-01 │ 148386817 │ █████████████████████████ │ 7582031 │ █████████████████████████ │ 170257 │ █████████████████████████ │
|
||||
│ 2020-03-01 │ 166266315 │ █████████████████████████ │ 8339049 │ █████████████████████████ │ 192460 │ █████████████████████████ │
|
||||
│ 2020-04-01 │ 178511581 │ █████████████████████████ │ 8991649 │ █████████████████████████ │ 202334 │ █████████████████████████ │
|
||||
│ 2020-05-01 │ 189993779 │ █████████████████████████ │ 9331358 │ █████████████████████████ │ 217357 │ █████████████████████████ │
|
||||
│ 2020-06-01 │ 187914434 │ █████████████████████████ │ 9085003 │ █████████████████████████ │ 223362 │ █████████████████████████ │
|
||||
│ 2020-07-01 │ 194244994 │ █████████████████████████ │ 9321706 │ █████████████████████████ │ 228222 │ █████████████████████████ │
|
||||
│ 2020-08-01 │ 196099301 │ █████████████████████████ │ 9368408 │ █████████████████████████ │ 230251 │ █████████████████████████ │
|
||||
│ 2020-09-01 │ 182549761 │ █████████████████████████ │ 9271571 │ █████████████████████████ │ 227889 │ █████████████████████████ │
|
||||
│ 2020-10-01 │ 186583890 │ █████████████████████████ │ 9396112 │ █████████████████████████ │ 233715 │ █████████████████████████ │
|
||||
│ 2020-11-01 │ 186083723 │ █████████████████████████ │ 9623053 │ █████████████████████████ │ 234963 │ █████████████████████████ │
|
||||
│ 2020-12-01 │ 191317162 │ █████████████████████████ │ 9898168 │ █████████████████████████ │ 249115 │ █████████████████████████ │
|
||||
│ 2021-01-01 │ 210496207 │ █████████████████████████ │ 10503943 │ █████████████████████████ │ 259805 │ █████████████████████████ │
|
||||
│ 2021-02-01 │ 193510365 │ █████████████████████████ │ 10215033 │ █████████████████████████ │ 253656 │ █████████████████████████ │
|
||||
│ 2021-03-01 │ 207454415 │ █████████████████████████ │ 10365629 │ █████████████████████████ │ 267263 │ █████████████████████████ │
|
||||
│ 2021-04-01 │ 204573086 │ █████████████████████████ │ 10391984 │ █████████████████████████ │ 270543 │ █████████████████████████ │
|
||||
│ 2021-05-01 │ 217655366 │ █████████████████████████ │ 10648130 │ █████████████████████████ │ 288555 │ █████████████████████████ │
|
||||
│ 2021-06-01 │ 208027069 │ █████████████████████████ │ 10397311 │ █████████████████████████ │ 291520 │ █████████████████████████ │
|
||||
│ 2021-07-01 │ 210955954 │ █████████████████████████ │ 10063967 │ █████████████████████████ │ 252061 │ █████████████████████████ │
|
||||
│ 2021-08-01 │ 225681244 │ █████████████████████████ │ 10383556 │ █████████████████████████ │ 254569 │ █████████████████████████ │
|
||||
│ 2021-09-01 │ 220086513 │ █████████████████████████ │ 10298344 │ █████████████████████████ │ 256826 │ █████████████████████████ │
|
||||
│ 2021-10-01 │ 227527379 │ █████████████████████████ │ 10729882 │ █████████████████████████ │ 283328 │ █████████████████████████ │
|
||||
│ 2021-11-01 │ 228289963 │ █████████████████████████ │ 10995197 │ █████████████████████████ │ 302386 │ █████████████████████████ │
|
||||
│ 2021-12-01 │ 235807471 │ █████████████████████████ │ 11312798 │ █████████████████████████ │ 313876 │ █████████████████████████ │
|
||||
│ 2022-01-01 │ 256766679 │ █████████████████████████ │ 12074520 │ █████████████████████████ │ 340407 │ █████████████████████████ │
|
||||
│ 2022-02-01 │ 219927645 │ █████████████████████████ │ 10846045 │ █████████████████████████ │ 293236 │ █████████████████████████ │
|
||||
│ 2022-03-01 │ 236554668 │ █████████████████████████ │ 11330285 │ █████████████████████████ │ 302387 │ █████████████████████████ │
|
||||
│ 2022-04-01 │ 231188077 │ █████████████████████████ │ 11697995 │ █████████████████████████ │ 316303 │ █████████████████████████ │
|
||||
│ 2022-05-01 │ 230492108 │ █████████████████████████ │ 11448584 │ █████████████████████████ │ 323725 │ █████████████████████████ │
|
||||
│ 2022-06-01 │ 218842949 │ █████████████████████████ │ 11400399 │ █████████████████████████ │ 324846 │ █████████████████████████ │
|
||||
│ 2022-07-01 │ 242504279 │ █████████████████████████ │ 12049204 │ █████████████████████████ │ 335621 │ █████████████████████████ │
|
||||
│ 2022-08-01 │ 247215325 │ █████████████████████████ │ 12189276 │ █████████████████████████ │ 337873 │ █████████████████████████ │
|
||||
│ 2022-09-01 │ 234131223 │ █████████████████████████ │ 11674079 │ █████████████████████████ │ 326325 │ █████████████████████████ │
|
||||
│ 2022-10-01 │ 237365072 │ █████████████████████████ │ 11804508 │ █████████████████████████ │ 336063 │ █████████████████████████ │
|
||||
│ 2022-11-01 │ 229478878 │ █████████████████████████ │ 11543020 │ █████████████████████████ │ 323122 │ █████████████████████████ │
|
||||
│ 2022-12-01 │ 238862690 │ █████████████████████████ │ 11967451 │ █████████████████████████ │ 331668 │ █████████████████████████ │
|
||||
│ 2023-01-01 │ 253577512 │ █████████████████████████ │ 12264087 │ █████████████████████████ │ 332711 │ █████████████████████████ │
|
||||
│ 2023-02-01 │ 221285501 │ █████████████████████████ │ 11537091 │ █████████████████████████ │ 317879 │ █████████████████████████ │
|
||||
└──────────────┴───────────┴───────────────────────────┴──────────┴───────────────────────────┴────────────┴───────────────────────────┘
|
||||
|
||||
172 rows in set. Elapsed: 184.809 sec. Processed 6.74 billion rows, 89.56 GB (36.47 million rows/s., 484.62 MB/s.)
|
||||
203 rows in set. Elapsed: 48.492 sec. Processed 14.69 billion rows, 213.35 GB (302.91 million rows/s., 4.40 GB/s.)
|
||||
```
|
||||
|
||||
10. Here are the top 10 subreddits of 2022:
|
||||
@ -450,26 +525,24 @@ ORDER BY count DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
The response is:
|
||||
|
||||
```response
|
||||
┌─subreddit────────┬───count─┐
|
||||
│ AskReddit │ 3858203 │
|
||||
│ politics │ 1356782 │
|
||||
│ memes │ 1249120 │
|
||||
│ nfl │ 883667 │
|
||||
│ worldnews │ 866065 │
|
||||
│ teenagers │ 777095 │
|
||||
│ AmItheAsshole │ 752720 │
|
||||
│ dankmemes │ 657932 │
|
||||
│ nba │ 514184 │
|
||||
│ unpopularopinion │ 473649 │
|
||||
└──────────────────┴─────────┘
|
||||
┌─subreddit──────┬────count─┐
|
||||
│ AskReddit │ 72312060 │
|
||||
│ AmItheAsshole │ 25323210 │
|
||||
│ teenagers │ 22355960 │
|
||||
│ worldnews │ 17797707 │
|
||||
│ FreeKarma4U │ 15652274 │
|
||||
│ FreeKarma4You │ 14929055 │
|
||||
│ wallstreetbets │ 14235271 │
|
||||
│ politics │ 12511136 │
|
||||
│ memes │ 11610792 │
|
||||
│ nba │ 11586571 │
|
||||
└────────────────┴──────────┘
|
||||
|
||||
10 rows in set. Elapsed: 27.824 sec. Processed 6.74 billion rows, 53.26 GB (242.22 million rows/s., 1.91 GB/s.)
|
||||
10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.)
|
||||
```
|
||||
|
||||
11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019:
|
||||
11. Let's see which subreddits had the biggest increase in comments from 2018 to 2019:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
@ -502,62 +575,62 @@ It looks like memes and teenagers were busy on Reddit in 2019:
|
||||
|
||||
```response
|
||||
┌─subreddit────────────┬─────diff─┐
|
||||
│ memes │ 15368369 │
|
||||
│ AskReddit │ 14663662 │
|
||||
│ teenagers │ 12266991 │
|
||||
│ AmItheAsshole │ 11561538 │
|
||||
│ dankmemes │ 11305158 │
|
||||
│ unpopularopinion │ 6332772 │
|
||||
│ PewdiepieSubmissions │ 5930818 │
|
||||
│ Market76 │ 5014668 │
|
||||
│ relationship_advice │ 3776383 │
|
||||
│ freefolk │ 3169236 │
|
||||
│ Minecraft │ 3160241 │
|
||||
│ classicwow │ 2907056 │
|
||||
│ Animemes │ 2673398 │
|
||||
│ gameofthrones │ 2402835 │
|
||||
│ PublicFreakout │ 2267605 │
|
||||
│ ShitPostCrusaders │ 2207266 │
|
||||
│ RoastMe │ 2195715 │
|
||||
│ gonewild │ 2148649 │
|
||||
│ AnthemTheGame │ 1803818 │
|
||||
│ entitledparents │ 1706270 │
|
||||
│ MortalKombat │ 1679508 │
|
||||
│ Cringetopia │ 1620555 │
|
||||
│ pokemon │ 1615266 │
|
||||
│ HistoryMemes │ 1608289 │
|
||||
│ Brawlstars │ 1574977 │
|
||||
│ iamatotalpieceofshit │ 1558315 │
|
||||
│ trashy │ 1518549 │
|
||||
│ ChapoTrapHouse │ 1505748 │
|
||||
│ Pikabu │ 1501001 │
|
||||
│ Showerthoughts │ 1475101 │
|
||||
│ cursedcomments │ 1465607 │
|
||||
│ ukpolitics │ 1386043 │
|
||||
│ wallstreetbets │ 1384431 │
|
||||
│ interestingasfuck │ 1378900 │
|
||||
│ wholesomememes │ 1353333 │
|
||||
│ AskOuija │ 1233263 │
|
||||
│ borderlands3 │ 1197192 │
|
||||
│ aww │ 1168257 │
|
||||
│ insanepeoplefacebook │ 1155473 │
|
||||
│ FortniteCompetitive │ 1122778 │
|
||||
│ EpicSeven │ 1117380 │
|
||||
│ FreeKarma4U │ 1116423 │
|
||||
│ YangForPresidentHQ │ 1086700 │
|
||||
│ SquaredCircle │ 1044089 │
|
||||
│ MurderedByWords │ 1042511 │
|
||||
│ AskMen │ 1024434 │
|
||||
│ thedivision │ 1016634 │
|
||||
│ barstoolsports │ 985032 │
|
||||
│ nfl │ 978340 │
|
||||
│ BattlefieldV │ 971408 │
|
||||
│ AskReddit │ 18765909 │
|
||||
│ memes │ 16496996 │
|
||||
│ teenagers │ 13071715 │
|
||||
│ AmItheAsshole │ 12312663 │
|
||||
│ dankmemes │ 12016716 │
|
||||
│ unpopularopinion │ 6809935 │
|
||||
│ PewdiepieSubmissions │ 6330844 │
|
||||
│ Market76 │ 5213690 │
|
||||
│ relationship_advice │ 4060717 │
|
||||
│ Minecraft │ 3328659 │
|
||||
│ freefolk │ 3227970 │
|
||||
│ classicwow │ 3063133 │
|
||||
│ Animemes │ 2866876 │
|
||||
│ gonewild │ 2457680 │
|
||||
│ PublicFreakout │ 2452288 │
|
||||
│ gameofthrones │ 2411661 │
|
||||
│ RoastMe │ 2378781 │
|
||||
│ ShitPostCrusaders │ 2345414 │
|
||||
│ AnthemTheGame │ 1813152 │
|
||||
│ nfl │ 1804407 │
|
||||
│ Showerthoughts │ 1797968 │
|
||||
│ Cringetopia │ 1764034 │
|
||||
│ pokemon │ 1763269 │
|
||||
│ entitledparents │ 1744852 │
|
||||
│ HistoryMemes │ 1721645 │
|
||||
│ MortalKombat │ 1718184 │
|
||||
│ trashy │ 1684357 │
|
||||
│ ChapoTrapHouse │ 1675363 │
|
||||
│ Brawlstars │ 1663763 │
|
||||
│ iamatotalpieceofshit │ 1647381 │
|
||||
│ ukpolitics │ 1599204 │
|
||||
│ cursedcomments │ 1590781 │
|
||||
│ Pikabu │ 1578597 │
|
||||
│ wallstreetbets │ 1535225 │
|
||||
│ AskOuija │ 1533214 │
|
||||
│ interestingasfuck │ 1528910 │
|
||||
│ aww │ 1439008 │
|
||||
│ wholesomememes │ 1436566 │
|
||||
│ SquaredCircle │ 1432172 │
|
||||
│ insanepeoplefacebook │ 1290686 │
|
||||
│ borderlands3 │ 1274462 │
|
||||
│ FreeKarma4U │ 1217769 │
|
||||
│ YangForPresidentHQ │ 1186918 │
|
||||
│ FortniteCompetitive │ 1184508 │
|
||||
│ AskMen │ 1180820 │
|
||||
│ EpicSeven │ 1172061 │
|
||||
│ MurderedByWords │ 1112476 │
|
||||
│ politics │ 1084087 │
|
||||
│ barstoolsports │ 1068020 │
|
||||
│ BattlefieldV │ 1053878 │
|
||||
└──────────────────────┴──────────┘
|
||||
|
||||
50 rows in set. Elapsed: 65.954 sec. Processed 13.48 billion rows, 79.67 GB (204.37 million rows/s., 1.21 GB/s.)
|
||||
50 rows in set. Elapsed: 10.680 sec. Processed 29.38 billion rows, 198.67 GB (2.75 billion rows/s., 18.60 GB/s.)
|
||||
```
|
||||
|
||||
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all the comments three times for a substring, and unfortunately ClickHouse user are obviously not very active on Reddit yet:
|
||||
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all 14.69 billion comments three times for a substring, but the performance is actually quite impressive. (Unfortunately ClickHouse users are not very active on Reddit yet):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
@ -571,7 +644,7 @@ ORDER BY quarter ASC;
|
||||
```
|
||||
|
||||
```response
|
||||
┌────Quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
|
||||
┌────quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
|
||||
│ 2005-10-01 │ 0 │ 0 │ 0 │
|
||||
│ 2006-01-01 │ 0 │ 2 │ 23 │
|
||||
│ 2006-04-01 │ 0 │ 2 │ 24 │
|
||||
@ -591,12 +664,12 @@ ORDER BY quarter ASC;
|
||||
│ 2009-10-01 │ 0 │ 633 │ 589 │
|
||||
│ 2010-01-01 │ 0 │ 555 │ 501 │
|
||||
│ 2010-04-01 │ 0 │ 587 │ 469 │
|
||||
│ 2010-07-01 │ 0 │ 770 │ 821 │
|
||||
│ 2010-10-01 │ 0 │ 1480 │ 550 │
|
||||
│ 2011-01-01 │ 0 │ 1482 │ 568 │
|
||||
│ 2011-04-01 │ 0 │ 1558 │ 406 │
|
||||
│ 2011-07-01 │ 0 │ 2163 │ 628 │
|
||||
│ 2011-10-01 │ 0 │ 4064 │ 566 │
|
||||
│ 2010-07-01 │ 0 │ 601 │ 696 │
|
||||
│ 2010-10-01 │ 0 │ 1246 │ 505 │
|
||||
│ 2011-01-01 │ 0 │ 758 │ 247 │
|
||||
│ 2011-04-01 │ 0 │ 537 │ 113 │
|
||||
│ 2011-07-01 │ 0 │ 173 │ 64 │
|
||||
│ 2011-10-01 │ 0 │ 649 │ 96 │
|
||||
│ 2012-01-01 │ 0 │ 4621 │ 662 │
|
||||
│ 2012-04-01 │ 0 │ 5737 │ 785 │
|
||||
│ 2012-07-01 │ 0 │ 6097 │ 1127 │
|
||||
@ -628,9 +701,20 @@ ORDER BY quarter ASC;
|
||||
│ 2019-01-01 │ 14 │ 80250 │ 4305 │
|
||||
│ 2019-04-01 │ 30 │ 70307 │ 3872 │
|
||||
│ 2019-07-01 │ 33 │ 77149 │ 4164 │
|
||||
│ 2019-10-01 │ 13 │ 76746 │ 3541 │
|
||||
│ 2020-01-01 │ 16 │ 54475 │ 846 │
|
||||
│ 2019-10-01 │ 22 │ 113011 │ 4369 │
|
||||
│ 2020-01-01 │ 34 │ 238273 │ 5133 │
|
||||
│ 2020-04-01 │ 52 │ 454467 │ 6100 │
|
||||
│ 2020-07-01 │ 37 │ 406623 │ 5507 │
|
||||
│ 2020-10-01 │ 49 │ 212143 │ 5385 │
|
||||
│ 2021-01-01 │ 56 │ 151262 │ 5749 │
|
||||
│ 2021-04-01 │ 71 │ 119928 │ 6039 │
|
||||
│ 2021-07-01 │ 53 │ 110342 │ 5765 │
|
||||
│ 2021-10-01 │ 92 │ 121144 │ 6401 │
|
||||
│ 2022-01-01 │ 93 │ 107512 │ 6772 │
|
||||
│ 2022-04-01 │ 120 │ 91560 │ 6687 │
|
||||
│ 2022-07-01 │ 183 │ 99764 │ 7377 │
|
||||
│ 2022-10-01 │ 123 │ 99447 │ 7052 │
|
||||
│ 2023-01-01 │ 126 │ 58733 │ 4891 │
|
||||
└────────────┴────────────┴───────────┴──────────┘
|
||||
|
||||
58 rows in set. Elapsed: 2663.751 sec. Processed 6.74 billion rows, 1.21 TB (2.53 million rows/s., 454.37 MB/s.)
|
||||
```
|
||||
70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.)
|
||||
|
@ -22,7 +22,7 @@ The steps below will easily work on a local install of ClickHouse too. The only
|
||||
|
||||
## Step-by-step instructions
|
||||
|
||||
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
|
||||
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result:
|
||||
|
||||
```sql
|
||||
DESCRIBE s3Cluster(
|
||||
@ -322,7 +322,7 @@ ORDER BY month ASC;
|
||||
A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty).
|
||||
|
||||
|
||||
### More subtitiles over time and when
|
||||
### More subtitles over time and when
|
||||
|
||||
With advances in speech recognition, it’s easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then?
|
||||
|
||||
@ -484,4 +484,4 @@ ARRAY JOIN
|
||||
│ 20th │ 16 │
|
||||
│ 10th │ 6 │
|
||||
└────────────┴─────────┘
|
||||
```
|
||||
```
|
||||
|
@ -467,6 +467,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`.
|
||||
- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`.
|
||||
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
|
||||
|
@ -275,9 +275,9 @@ Type: UInt64
|
||||
|
||||
Default: 1000
|
||||
|
||||
## max_concurrent_insert_queries
|
||||
## max_concurrent_queries
|
||||
|
||||
Limit on total number of concurrent insert queries. Zero means Unlimited.
|
||||
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
|
||||
|
||||
:::note
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
@ -287,9 +287,9 @@ Type: UInt64
|
||||
|
||||
Default: 0
|
||||
|
||||
## max_concurrent_queries
|
||||
## max_concurrent_insert_queries
|
||||
|
||||
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
|
||||
Limit on total number of concurrent insert queries. Zero means Unlimited.
|
||||
|
||||
:::note
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
@ -1277,49 +1277,6 @@ For more information, see the section [Creating replicated tables](../../engines
|
||||
<macros incl="macros" optional="true" />
|
||||
```
|
||||
|
||||
|
||||
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
|
||||
|
||||
The maximum number of simultaneously processed queries related to MergeTree table per user.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
|
||||
```
|
||||
|
||||
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
|
||||
|
||||
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
|
||||
|
||||
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
|
||||
|
||||
Modifying the setting for one query or user does not affect other queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [max_concurrent_queries](#max-concurrent-queries)
|
||||
|
||||
## max_open_files {#max-open-files}
|
||||
|
||||
The maximum number of open files.
|
||||
@ -1947,7 +1904,7 @@ Config fields:
|
||||
- `regexp` - RE2 compatible regular expression (mandatory)
|
||||
- `replace` - substitution string for sensitive data (optional, by default - six asterisks)
|
||||
|
||||
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parsable queries).
|
||||
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parseable queries).
|
||||
|
||||
`system.events` table have counter `QueryMaskingRulesMatch` which have an overall number of query masking rules matches.
|
||||
|
||||
|
@ -882,6 +882,38 @@ My NULL
|
||||
My NULL
|
||||
```
|
||||
|
||||
### input_format_csv_trim_whitespaces {#input_format_csv_trim_whitespaces}
|
||||
|
||||
Trims spaces and tabs in non-quoted CSV strings.
|
||||
|
||||
Default value: `true`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_trim_whitespaces=true
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
"string"
|
||||
```
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_trim_whitespaces=false
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
" string "
|
||||
```
|
||||
|
||||
## Values format settings {#values-format-settings}
|
||||
|
||||
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
|
||||
@ -1182,7 +1214,7 @@ Possible values:
|
||||
|
||||
- `bin` - as 16-bytes binary.
|
||||
- `str` - as a string of 36 bytes.
|
||||
- `ext` - as extention with ExtType = 2.
|
||||
- `ext` - as extension with ExtType = 2.
|
||||
|
||||
Default value: `ext`.
|
||||
|
||||
|
@ -227,6 +227,89 @@ SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_
|
||||
SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok.
|
||||
```
|
||||
|
||||
## ignore_data_skipping_indices {#settings-ignore_data_skipping_indices}
|
||||
|
||||
Ignores the skipping indexes specified if used by the query.
|
||||
|
||||
Consider the following example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE data
|
||||
(
|
||||
key Int,
|
||||
x Int,
|
||||
y Int,
|
||||
INDEX x_idx x TYPE minmax GRANULARITY 1,
|
||||
INDEX y_idx y TYPE minmax GRANULARITY 1,
|
||||
INDEX xy_idx (x,y) TYPE minmax GRANULARITY 1
|
||||
)
|
||||
Engine=MergeTree()
|
||||
ORDER BY key;
|
||||
|
||||
INSERT INTO data VALUES (1, 2, 3);
|
||||
|
||||
SELECT * FROM data;
|
||||
SELECT * FROM data SETTINGS ignore_data_skipping_indices=''; -- query will produce CANNOT_PARSE_TEXT error.
|
||||
SELECT * FROM data SETTINGS ignore_data_skipping_indices='x_idx'; -- Ok.
|
||||
SELECT * FROM data SETTINGS ignore_data_skipping_indices='na_idx'; -- Ok.
|
||||
|
||||
SELECT * FROM data WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- query will produce INDEX_NOT_USED error, since xy_idx is explictly ignored.
|
||||
SELECT * FROM data WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
|
||||
```
|
||||
|
||||
The query without ignoring any indexes:
|
||||
```sql
|
||||
EXPLAIN indexes = 1 SELECT * FROM data WHERE x = 1 AND y = 2;
|
||||
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
Filter (WHERE)
|
||||
ReadFromMergeTree (default.data)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
Skip
|
||||
Name: x_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/1
|
||||
Granules: 0/1
|
||||
Skip
|
||||
Name: y_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/0
|
||||
Granules: 0/0
|
||||
Skip
|
||||
Name: xy_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/0
|
||||
Granules: 0/0
|
||||
```
|
||||
|
||||
Ignoring the `xy_idx` index:
|
||||
```sql
|
||||
EXPLAIN indexes = 1 SELECT * FROM data WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
|
||||
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
Filter (WHERE)
|
||||
ReadFromMergeTree (default.data)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
Skip
|
||||
Name: x_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/1
|
||||
Granules: 0/1
|
||||
Skip
|
||||
Name: y_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/0
|
||||
Granules: 0/0
|
||||
```
|
||||
|
||||
Works with tables in the MergeTree family.
|
||||
|
||||
## convert_query_to_cnf {#convert_query_to_cnf}
|
||||
@ -646,6 +729,48 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
|
||||
However, the block size cannot be more than `max_block_size` rows.
|
||||
By default: 1,000,000. It only works when reading from MergeTree engines.
|
||||
|
||||
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
|
||||
|
||||
The maximum number of simultaneously processed queries related to MergeTree table per user.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
|
||||
```
|
||||
|
||||
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
|
||||
|
||||
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
|
||||
|
||||
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
|
||||
|
||||
Modifying the setting for one query or user does not affect other queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries)
|
||||
|
||||
## merge_tree_min_rows_for_concurrent_read {#setting-merge-tree-min-rows-for-concurrent-read}
|
||||
|
||||
If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
|
||||
@ -1050,6 +1175,12 @@ Timeouts in seconds on the socket used for communicating with the client.
|
||||
|
||||
Default value: 10, 300, 300.
|
||||
|
||||
## handshake_timeout_ms {#handshake-timeout-ms}
|
||||
|
||||
Timeout in milliseconds for receiving Hello packet from replicas during handshake.
|
||||
|
||||
Default value: 10000.
|
||||
|
||||
## cancel_http_readonly_queries_on_client_close {#cancel-http-readonly-queries-on-client-close}
|
||||
|
||||
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
|
||||
@ -1107,7 +1238,7 @@ Default value: `0`.
|
||||
Could be used for throttling speed when replicating the data to add or replace new nodes.
|
||||
|
||||
:::note
|
||||
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
:::
|
||||
|
||||
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
|
||||
@ -1128,7 +1259,7 @@ Default value: `0`.
|
||||
Could be used for throttling speed when replicating the data to add or replace new nodes.
|
||||
|
||||
:::note
|
||||
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
:::
|
||||
|
||||
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
|
||||
@ -2030,7 +2161,7 @@ FORMAT PrettyCompactMonoBlock
|
||||
|
||||
## distributed_push_down_limit {#distributed-push-down-limit}
|
||||
|
||||
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
|
||||
Enables or disables [LIMIT](#limit) applying on each shard separately.
|
||||
|
||||
This will allow to avoid:
|
||||
- Sending extra rows over network;
|
||||
@ -2431,7 +2562,7 @@ Default value: 0.
|
||||
|
||||
## allow_introspection_functions {#settings-allow_introspection_functions}
|
||||
|
||||
Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
|
||||
Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -3492,7 +3623,7 @@ Default value: `0`.
|
||||
|
||||
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
|
||||
|
||||
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
|
||||
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -3818,8 +3949,8 @@ Result:
|
||||
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
|
||||
|
||||
Enables or disables returning results of type:
|
||||
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth).
|
||||
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot).
|
||||
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toLastDayOfWeek](../../sql-reference/functions/date-time-functions.md#tolastdayofweek) and [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday).
|
||||
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -4181,6 +4312,12 @@ Default value: `2000`
|
||||
If it's enabled, in hedged requests we can start new connection until receiving first data packet even if we have already made some progress
|
||||
(but progress haven't updated for `receive_data_timeout` timeout), otherwise we disable changing replica after the first time we made progress.
|
||||
|
||||
## parallel_view_processing
|
||||
|
||||
Enables pushing to attached views concurrently instead of sequentially.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## partial_result_on_first_cancel {#partial_result_on_first_cancel}
|
||||
When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterwards, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests.
|
||||
|
||||
|
@ -28,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
|
||||
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
|
||||
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
|
||||
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.
|
||||
|
@ -12,7 +12,7 @@ Columns:
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid.
|
||||
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name.
|
||||
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory.
|
||||
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadata_dropped directory.
|
||||
- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`
|
||||
|
||||
**Example**
|
||||
|
@ -43,7 +43,7 @@ Columns:
|
||||
- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type.
|
||||
- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
|
||||
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.
|
||||
|
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/licenses
|
||||
---
|
||||
# licenses
|
||||
|
||||
Сontains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
|
||||
Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
|
||||
|
||||
Columns:
|
||||
|
||||
|
@ -100,7 +100,7 @@ Columns:
|
||||
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
|
||||
|
||||
:::note
|
||||
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
|
||||
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simplest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
|
||||
:::
|
||||
|
||||
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
|
||||
|
@ -10,14 +10,14 @@ Columns:
|
||||
- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated.
|
||||
- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server.
|
||||
- `elapsed` (Float64) – The time in seconds since request execution started.
|
||||
- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
|
||||
- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
|
||||
- `read_rows` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
|
||||
- `read_bytes` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
|
||||
- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known.
|
||||
- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
|
||||
- `memory_usage` (Int64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
|
||||
- `query` (String) – The query text. For `INSERT`, it does not include the data to insert.
|
||||
- `query_id` (String) – Query ID, if defined.
|
||||
- `is_cancelled` (Int8) – Was query cancelled.
|
||||
- `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server).
|
||||
- `is_cancelled` (UInt8) – Was query cancelled.
|
||||
- `is_all_data_sent` (UInt8) – Was all data sent to the client (in other words query had been finished on the server).
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
|
||||
|
@ -14,8 +14,8 @@ Columns:
|
||||
- `['user_name']` — Connections with the same user name share the same quota.
|
||||
- `['ip_address']` — Connections from the same IP share the same quota.
|
||||
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
|
||||
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
|
||||
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
|
||||
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the quota is tracked for `user_name`.
|
||||
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the quota is tracked for `ip_address`.
|
||||
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
|
||||
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
|
||||
- `0` — The quota applies to users specify in the `apply_to_list`.
|
||||
|
@ -50,7 +50,7 @@ Columns:
|
||||
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
|
||||
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
|
||||
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table).
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underlying `Buffer` table).
|
||||
|
||||
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage).
|
||||
|
||||
|
@ -43,7 +43,7 @@ Columns:
|
||||
|
||||
- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string.
|
||||
|
||||
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0.
|
||||
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0.
|
||||
|
||||
**Example**
|
||||
|
||||
|
28
docs/en/operations/system-tables/user_processes.md
Normal file
28
docs/en/operations/system-tables/user_processes.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/user_processes
|
||||
---
|
||||
# user_processes
|
||||
|
||||
This system table can be used to get overview of memory usage and ProfileEvents of users.
|
||||
|
||||
Columns:
|
||||
|
||||
- `user` ([String](../../sql-reference/data-types/string.md)) — User name.
|
||||
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint#int-ranges)) – Sum of RAM used by all processes of the user. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
|
||||
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint#int-ranges)) — The peak of memory usage of the user. It can be reset when no queries are run for the user.
|
||||
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map)) – Summary of ProfileEvents that measure different metrics for the user. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.user_processes LIMIT 10 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
user: default
|
||||
memory_usage: 9832
|
||||
peak_memory_usage: 9832
|
||||
ProfileEvents: {'Query':5,'SelectQuery':5,'QueriesWithSubqueries':38,'SelectQueriesWithSubqueries':38,'QueryTimeMicroseconds':842048,'SelectQueryTimeMicroseconds':842048,'ReadBufferFromFileDescriptorRead':6,'ReadBufferFromFileDescriptorReadBytes':234,'IOBufferAllocs':3,'IOBufferAllocBytes':98493,'ArenaAllocChunks':283,'ArenaAllocBytes':1482752,'FunctionExecute':670,'TableFunctionExecute':16,'DiskReadElapsedMicroseconds':19,'NetworkSendElapsedMicroseconds':684,'NetworkSendBytes':139498,'SelectedRows':6076,'SelectedBytes':685802,'ContextLock':1140,'RWLockAcquiredReadLocks':193,'RWLockReadersWaitMilliseconds':4,'RealTimeMicroseconds':1585163,'UserTimeMicroseconds':889767,'SystemTimeMicroseconds':13630,'SoftPageFaults':1947,'OSCPUWaitMicroseconds':6,'OSCPUVirtualTimeMicroseconds':903251,'OSReadChars':28631,'OSWriteChars':28888,'QueryProfilerRuns':3,'LogTrace':79,'LogDebug':24}
|
||||
|
||||
1 row in set. Elapsed: 0.010 sec.
|
||||
```
|
@ -33,7 +33,7 @@ Columns with request response parameters:
|
||||
|
||||
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed).
|
||||
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them:
|
||||
- `ZOK` — The request was executed seccessfully.
|
||||
- `ZOK` — The request was executed successfully.
|
||||
- `ZCONNECTIONLOSS` — The connection was lost.
|
||||
- `ZOPERATIONTIMEOUT` — The request execution timeout has expired.
|
||||
- `ZSESSIONEXPIRED` — The session has expired.
|
||||
@ -43,7 +43,7 @@ Columns with request response parameters:
|
||||
- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`.
|
||||
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created.
|
||||
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node.
|
||||
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node.
|
||||
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified children of this ZooKeeper node.
|
||||
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node.
|
||||
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node.
|
||||
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node.
|
||||
|
53
docs/en/operations/utilities/clickhouse-keeper-client.md
Normal file
53
docs/en/operations/utilities/clickhouse-keeper-client.md
Normal file
@ -0,0 +1,53 @@
|
||||
---
|
||||
slug: /en/operations/utilities/clickhouse-keeper-client
|
||||
sidebar_label: clickhouse-keeper-client
|
||||
---
|
||||
|
||||
# clickhouse-keeper-client
|
||||
|
||||
A client application to interact with clickhouse-keeper by its native protocol.
|
||||
|
||||
## Keys {#clickhouse-keeper-client}
|
||||
|
||||
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
|
||||
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
|
||||
- `-p N`, `--port=N` — Server port. Default value: 2181
|
||||
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
|
||||
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
|
||||
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
|
||||
- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
|
||||
- `--help` — Shows the help message.
|
||||
|
||||
## Example {#clickhouse-keeper-client-example}
|
||||
|
||||
```bash
|
||||
./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
|
||||
Connected to ZooKeeper at [::1]:2181 with session_id 137
|
||||
/ :) ls
|
||||
keeper foo bar
|
||||
/ :) cd keeper
|
||||
/keeper :) ls
|
||||
api_version
|
||||
/keeper :) cd api_version
|
||||
/keeper/api_version :) ls
|
||||
|
||||
/keeper/api_version :) cd xyz
|
||||
Path /keeper/api_version/xyz does not exists
|
||||
/keeper/api_version :) cd ../../
|
||||
/ :) ls
|
||||
keeper foo bar
|
||||
/ :) get keeper/api_version
|
||||
2
|
||||
```
|
||||
|
||||
## Commands {#clickhouse-keeper-client-commands}
|
||||
|
||||
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
|
||||
- `cd [path]` -- Change the working path (default `.`)
|
||||
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
|
||||
- `create <path> <value>` -- Creates new node
|
||||
- `get <path>` -- Returns the node's value
|
||||
- `remove <path>` -- Remove the node
|
||||
- `rmr <path>` -- Recursively deletes path. Confirmation required
|
||||
- `flwc <command>` -- Executes four-letter-word command
|
||||
- `help` -- Prints this message
|
@ -24,7 +24,7 @@ It is designed to retain the following properties of data:
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregatio, and sorting will work at almost the same speed
|
||||
reading data, filtering, aggregation, and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
|
||||
|
@ -30,7 +30,34 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all ‘a
|
||||
|
||||
The -Map suffix can be appended to any aggregate function. This will create an aggregate function which gets Map type as an argument, and aggregates values of each key of the map separately using the specified aggregate function. The result is also of a Map type.
|
||||
|
||||
Examples: `sumMap(map(1,1))`, `avgMap(map('a', 1))`.
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE map_map(
|
||||
date Date,
|
||||
timeslot DateTime,
|
||||
status Map(String, UInt64)
|
||||
) ENGINE = Log;
|
||||
|
||||
INSERT INTO map_map VALUES
|
||||
('2000-01-01', '2000-01-01 00:00:00', (['a', 'b', 'c'], [10, 10, 10])),
|
||||
('2000-01-01', '2000-01-01 00:00:00', (['c', 'd', 'e'], [10, 10, 10])),
|
||||
('2000-01-01', '2000-01-01 00:01:00', (['d', 'e', 'f'], [10, 10, 10])),
|
||||
('2000-01-01', '2000-01-01 00:01:00', (['f', 'g', 'g'], [10, 10, 10]));
|
||||
|
||||
SELECT
|
||||
timeslot,
|
||||
sumMap(status),
|
||||
avgMap(status),
|
||||
minMap(status)
|
||||
FROM map_map
|
||||
GROUP BY timeslot;
|
||||
|
||||
┌────────────timeslot─┬─sumMap(status)───────────────────────┬─avgMap(status)───────────────────────┬─minMap(status)───────────────────────┐
|
||||
│ 2000-01-01 00:00:00 │ {'a':10,'b':10,'c':20,'d':10,'e':10} │ {'a':10,'b':10,'c':10,'d':10,'e':10} │ {'a':10,'b':10,'c':10,'d':10,'e':10} │
|
||||
│ 2000-01-01 00:01:00 │ {'d':10,'e':10,'f':20,'g':20} │ {'d':10,'e':10,'f':10,'g':10} │ {'d':10,'e':10,'f':10,'g':10} │
|
||||
└─────────────────────┴──────────────────────────────────────┴──────────────────────────────────────┴──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## -SimpleState
|
||||
|
||||
|
@ -4,7 +4,7 @@ sidebar_label: Aggregate Functions
|
||||
sidebar_position: 33
|
||||
---
|
||||
|
||||
# Aggregate Functions
|
||||
# Aggregate Functions
|
||||
|
||||
Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) way as expected by database experts.
|
||||
|
||||
@ -72,3 +72,16 @@ FROM t_null_big
|
||||
│ 2.3333333333333335 │ 1.4 │
|
||||
└────────────────────┴─────────────────────┘
|
||||
```
|
||||
|
||||
Also you can use [Tuple](/docs/en/sql-reference/data-types/tuple.md) to work around NULL skipping behavior. The a `Tuple` that contains only a `NULL` value is not `NULL`, so the aggregate functions won't skip that row because of that `NULL` value.
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
groupArray(y),
|
||||
groupArray(tuple(y)).1
|
||||
FROM t_null_big;
|
||||
|
||||
┌─groupArray(y)─┬─tupleElement(groupArray(tuple(y)), 1)─┐
|
||||
│ [2,2,3] │ [2,NULL,2,3,NULL] │
|
||||
└───────────────┴───────────────────────────────────────┘
|
||||
```
|
||||
|
@ -356,7 +356,7 @@ Type: `UInt8`.
|
||||
|
||||
Let’s consider an example of calculating the `retention` function to determine site traffic.
|
||||
|
||||
**1.** Сreate a table to illustrate an example.
|
||||
**1.** Create a table to illustrate an example.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE retention_test(date Date, uid Int32) ENGINE = Memory;
|
||||
|
@ -6,6 +6,7 @@ sidebar_position: 106
|
||||
# argMax
|
||||
|
||||
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
|
||||
Both parts the `arg` and the `max` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -49,3 +50,60 @@ Result:
|
||||
│ director │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
**Extended example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE test
|
||||
(
|
||||
a Nullable(String),
|
||||
b Nullable(Int64)
|
||||
)
|
||||
ENGINE = Memory AS
|
||||
SELECT *
|
||||
FROM VALUES(('a', 1), ('b', 2), ('c', 2), (NULL, 3), (NULL, NULL), ('d', NULL));
|
||||
|
||||
select * from test;
|
||||
┌─a────┬────b─┐
|
||||
│ a │ 1 │
|
||||
│ b │ 2 │
|
||||
│ c │ 2 │
|
||||
│ ᴺᵁᴸᴸ │ 3 │
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
│ d │ ᴺᵁᴸᴸ │
|
||||
└──────┴──────┘
|
||||
|
||||
SELECT argMax(a, b), max(b) FROM test;
|
||||
┌─argMax(a, b)─┬─max(b)─┐
|
||||
│ b │ 3 │ -- argMax = 'b' because it the first not Null value, max(b) is from another row!
|
||||
└──────────────┴────────┘
|
||||
|
||||
SELECT argMax(tuple(a), b) FROM test;
|
||||
┌─argMax(tuple(a), b)─┐
|
||||
│ (NULL) │ -- The a `Tuple` that contains only a `NULL` value is not `NULL`, so the aggregate functions won't skip that row because of that `NULL` value
|
||||
└─────────────────────┘
|
||||
|
||||
SELECT (argMax((a, b), b) as t).1 argMaxA, t.2 argMaxB FROM test;
|
||||
┌─argMaxA─┬─argMaxB─┐
|
||||
│ ᴺᵁᴸᴸ │ 3 │ -- you can use Tuple and get both (all - tuple(*)) columns for the according max(b)
|
||||
└─────────┴─────────┘
|
||||
|
||||
SELECT argMax(a, b), max(b) FROM test WHERE a IS NULL AND b IS NULL;
|
||||
┌─argMax(a, b)─┬─max(b)─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ -- All aggregated rows contains at least one `NULL` value because of the filter, so all rows are skipped, therefore the result will be `NULL`
|
||||
└──────────────┴────────┘
|
||||
|
||||
SELECT argMax(a, (b,a)) FROM test;
|
||||
┌─argMax(a, tuple(b, a))─┐
|
||||
│ c │ -- There are two rows with b=2, `Tuple` in the `Max` allows to get not the first `arg`
|
||||
└────────────────────────┘
|
||||
|
||||
SELECT argMax(a, tuple(b)) FROM test;
|
||||
┌─argMax(a, tuple(b))─┐
|
||||
│ b │ -- `Tuple` can be used in `Max` to not skip Nulls in `Max`
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [Tuple](/docs/en/sql-reference/data-types/tuple.md)
|
||||
|
@ -6,6 +6,7 @@ sidebar_position: 105
|
||||
# argMin
|
||||
|
||||
Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
|
||||
Both parts the `arg` and the `min` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -49,3 +50,65 @@ Result:
|
||||
│ worker │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
**Extended example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE test
|
||||
(
|
||||
a Nullable(String),
|
||||
b Nullable(Int64)
|
||||
)
|
||||
ENGINE = Memory AS
|
||||
SELECT *
|
||||
FROM VALUES((NULL, 0), ('a', 1), ('b', 2), ('c', 2), (NULL, NULL), ('d', NULL));
|
||||
|
||||
select * from test;
|
||||
┌─a────┬────b─┐
|
||||
│ ᴺᵁᴸᴸ │ 0 │
|
||||
│ a │ 1 │
|
||||
│ b │ 2 │
|
||||
│ c │ 2 │
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
│ d │ ᴺᵁᴸᴸ │
|
||||
└──────┴──────┘
|
||||
|
||||
SELECT argMin(a, b), min(b) FROM test;
|
||||
┌─argMin(a, b)─┬─min(b)─┐
|
||||
│ a │ 0 │ -- argMin = a because it the first not `NULL` value, min(b) is from another row!
|
||||
└──────────────┴────────┘
|
||||
|
||||
SELECT argMin(tuple(a), b) FROM test;
|
||||
┌─argMin(tuple(a), b)─┐
|
||||
│ (NULL) │ -- The a `Tuple` that contains only a `NULL` value is not `NULL`, so the aggregate functions won't skip that row because of that `NULL` value
|
||||
└─────────────────────┘
|
||||
|
||||
SELECT (argMin((a, b), b) as t).1 argMinA, t.2 argMinB from test;
|
||||
┌─argMinA─┬─argMinB─┐
|
||||
│ ᴺᵁᴸᴸ │ 0 │ -- you can use `Tuple` and get both (all - tuple(*)) columns for the according max(b)
|
||||
└─────────┴─────────┘
|
||||
|
||||
SELECT argMin(a, b), min(b) FROM test WHERE a IS NULL and b IS NULL;
|
||||
┌─argMin(a, b)─┬─min(b)─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ -- All aggregated rows contains at least one `NULL` value because of the filter, so all rows are skipped, therefore the result will be `NULL`
|
||||
└──────────────┴────────┘
|
||||
|
||||
SELECT argMin(a, (b, a)), min(tuple(b, a)) FROM test;
|
||||
┌─argMin(a, tuple(b, a))─┬─min(tuple(b, a))─┐
|
||||
│ d │ (NULL,NULL) │ -- 'd' is the first not `NULL` value for the min
|
||||
└────────────────────────┴──────────────────┘
|
||||
|
||||
SELECT argMin((a, b), (b, a)), min(tuple(b, a)) FROM test;
|
||||
┌─argMin(tuple(a, b), tuple(b, a))─┬─min(tuple(b, a))─┐
|
||||
│ (NULL,NULL) │ (NULL,NULL) │ -- argMin returns (NULL,NULL) here because `Tuple` allows to don't skip `NULL` and min(tuple(b, a)) in this case is minimal value for this dataset
|
||||
└──────────────────────────────────┴──────────────────┘
|
||||
|
||||
SELECT argMin(a, tuple(b)) FROM test;
|
||||
┌─argMax(a, tuple(b))─┐
|
||||
│ d │ -- `Tuple` can be used in `min` to not skip rows with `NULL` values as b.
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [Tuple](/docs/en/sql-reference/data-types/tuple.md)
|
||||
|
@ -0,0 +1,44 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/boundingRatio
|
||||
sidebar_position: 2
|
||||
title: boundingRatio
|
||||
---
|
||||
|
||||
Aggregate function that calculates the slope between the leftmost and rightmost points across a group of values.
|
||||
|
||||
Example:
|
||||
|
||||
Sample data:
|
||||
```sql
|
||||
SELECT
|
||||
number,
|
||||
number * 1.5
|
||||
FROM numbers(10)
|
||||
```
|
||||
```response
|
||||
┌─number─┬─multiply(number, 1.5)─┐
|
||||
│ 0 │ 0 │
|
||||
│ 1 │ 1.5 │
|
||||
│ 2 │ 3 │
|
||||
│ 3 │ 4.5 │
|
||||
│ 4 │ 6 │
|
||||
│ 5 │ 7.5 │
|
||||
│ 6 │ 9 │
|
||||
│ 7 │ 10.5 │
|
||||
│ 8 │ 12 │
|
||||
│ 9 │ 13.5 │
|
||||
└────────┴───────────────────────┘
|
||||
```
|
||||
|
||||
The boundingRatio() function returns the slope of the line between the leftmost and rightmost points, in the above data these points are `(0,0)` and `(9,13.5)`.
|
||||
|
||||
```sql
|
||||
SELECT boundingRatio(number, number * 1.5)
|
||||
FROM numbers(10)
|
||||
```
|
||||
```response
|
||||
┌─boundingRatio(number, multiply(number, 1.5))─┐
|
||||
│ 1.5 │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 351
|
||||
|
||||
# cramersV
|
||||
|
||||
[Cramér's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramér's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
|
||||
[Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -69,4 +69,4 @@ Result:
|
||||
┌─────cramersV(a, b)─┐
|
||||
│ 0.8944271909999159 │
|
||||
└────────────────────┘
|
||||
```
|
||||
```
|
||||
|
@ -6,7 +6,7 @@ sidebar_position: 352
|
||||
# cramersVBiasCorrected
|
||||
|
||||
|
||||
Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
|
||||
Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
|
||||
|
||||
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_title: exponentialMovingAverage
|
||||
|
||||
## exponentialMovingAverage
|
||||
|
||||
Сalculates the exponential moving average of values for the determined time.
|
||||
Calculates the exponential moving average of values for the determined time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -27,7 +27,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Returnes an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
|
||||
- Returns an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
|
@ -6,24 +6,32 @@ sidebar_position: 7
|
||||
# first_value
|
||||
|
||||
Selects the first encountered value, similar to `any`, but could accept NULL.
|
||||
Mostly it should be used with [Window Functions](../../window-functions/index.md).
|
||||
Without Window Functions the result will be random if the source stream is not ordered.
|
||||
|
||||
## examples
|
||||
|
||||
```sql
|
||||
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
|
||||
CREATE TABLE test_data
|
||||
(
|
||||
a Int64,
|
||||
b Nullable(Int64)
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null);
|
||||
```
|
||||
|
||||
### example1
|
||||
The NULL value is ignored at default.
|
||||
```sql
|
||||
select first_value(b) from test_data
|
||||
select first_value(b) from test_data;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─first_value_ignore_nulls(b)─┐
|
||||
│ 3 │
|
||||
└─────────────────────────────┘
|
||||
|
||||
```
|
||||
|
||||
### example2
|
||||
@ -36,7 +44,6 @@ select first_value(b) ignore nulls from test_data
|
||||
┌─first_value_ignore_nulls(b)─┐
|
||||
│ 3 │
|
||||
└─────────────────────────────┘
|
||||
|
||||
```
|
||||
|
||||
### example3
|
||||
@ -46,10 +53,28 @@ select first_value(b) respect nulls from test_data
|
||||
```
|
||||
|
||||
```text
|
||||
|
||||
┌─first_value_respect_nulls(b)─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
### example4
|
||||
Stabilized result using the sub-query with `ORDER BY`.
|
||||
```sql
|
||||
SELECT
|
||||
first_value_respect_nulls(b),
|
||||
first_value(b)
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM test_data
|
||||
ORDER BY a ASC
|
||||
)
|
||||
```
|
||||
|
||||
```text
|
||||
┌─first_value_respect_nulls(b)─┬─first_value(b)─┐
|
||||
│ ᴺᵁᴸᴸ │ 3 │
|
||||
└──────────────────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 125
|
||||
|
||||
# groupBitAnd
|
||||
|
||||
Applies bitwise `AND` for series of numbers.
|
||||
Applies bit-wise `AND` for series of numbers.
|
||||
|
||||
``` sql
|
||||
groupBitAnd(expr)
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 126
|
||||
|
||||
# groupBitOr
|
||||
|
||||
Applies bitwise `OR` for series of numbers.
|
||||
Applies bit-wise `OR` for series of numbers.
|
||||
|
||||
``` sql
|
||||
groupBitOr(expr)
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 127
|
||||
|
||||
# groupBitXor
|
||||
|
||||
Applies bitwise `XOR` for series of numbers.
|
||||
Applies bit-wise `XOR` for series of numbers.
|
||||
|
||||
``` sql
|
||||
groupBitXor(expr)
|
||||
|
@ -9,74 +9,75 @@ toc_hidden: true
|
||||
|
||||
Standard aggregate functions:
|
||||
|
||||
- [count](../../../sql-reference/aggregate-functions/reference/count.md)
|
||||
- [min](../../../sql-reference/aggregate-functions/reference/min.md)
|
||||
- [max](../../../sql-reference/aggregate-functions/reference/max.md)
|
||||
- [sum](../../../sql-reference/aggregate-functions/reference/sum.md)
|
||||
- [avg](../../../sql-reference/aggregate-functions/reference/avg.md)
|
||||
- [any](../../../sql-reference/aggregate-functions/reference/any.md)
|
||||
- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md)
|
||||
- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md)
|
||||
- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)
|
||||
- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md)
|
||||
- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md)
|
||||
- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md)
|
||||
- [count](/docs/en/sql-reference/aggregate-functions/reference/count.md)
|
||||
- [min](/docs/en/sql-reference/aggregate-functions/reference/min.md)
|
||||
- [max](/docs/en/sql-reference/aggregate-functions/reference/max.md)
|
||||
- [sum](/docs/en/sql-reference/aggregate-functions/reference/sum.md)
|
||||
- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md)
|
||||
- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md)
|
||||
- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md)
|
||||
- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md)
|
||||
- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md)
|
||||
- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md)
|
||||
- [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md)
|
||||
- [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md)
|
||||
|
||||
ClickHouse-specific aggregate functions:
|
||||
|
||||
- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md)
|
||||
- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md)
|
||||
- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md)
|
||||
- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md)
|
||||
- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
|
||||
- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
|
||||
- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)
|
||||
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
|
||||
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
|
||||
- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md)
|
||||
- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md)
|
||||
- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
|
||||
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
|
||||
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||
- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md)
|
||||
- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md)
|
||||
- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md)
|
||||
- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md)
|
||||
- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md)
|
||||
- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md)
|
||||
- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md)
|
||||
- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md)
|
||||
- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md)
|
||||
- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md)
|
||||
- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md)
|
||||
- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md)
|
||||
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md)
|
||||
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md)
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md)
|
||||
- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md)
|
||||
- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md)
|
||||
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md)
|
||||
- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md)
|
||||
- [quantileExactLow](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow)
|
||||
- [quantileExactHigh](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh)
|
||||
- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md)
|
||||
- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md)
|
||||
- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
|
||||
- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md)
|
||||
- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md)
|
||||
- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
|
||||
- [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
|
||||
- [quantileBFloat16Weighted](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
|
||||
- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md)
|
||||
- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
|
||||
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
|
||||
- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)
|
||||
- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md)
|
||||
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md)
|
||||
- [boundingRatio](/docs/en/sql-reference/aggregate-functions/reference/boundrat.md)
|
||||
- [first_value](/docs/en/sql-reference/aggregate-functions/reference/first_value.md)
|
||||
- [last_value](/docs/en/sql-reference/aggregate-functions/reference/last_value.md)
|
||||
- [argMin](/docs/en/sql-reference/aggregate-functions/reference/argmin.md)
|
||||
- [argMax](/docs/en/sql-reference/aggregate-functions/reference/argmax.md)
|
||||
- [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md)
|
||||
- [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md)
|
||||
- [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md)
|
||||
- [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md)
|
||||
- [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md)
|
||||
- [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md)
|
||||
- [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
|
||||
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||
- [groupBitmap](/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md)
|
||||
- [groupBitmapAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md)
|
||||
- [groupBitmapOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md)
|
||||
- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md)
|
||||
- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md)
|
||||
- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md)
|
||||
- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md)
|
||||
- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md)
|
||||
- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md)
|
||||
- [skewPop](/docs/en/sql-reference/aggregate-functions/reference/skewpop.md)
|
||||
- [kurtSamp](/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md)
|
||||
- [kurtPop](/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md)
|
||||
- [uniq](/docs/en/sql-reference/aggregate-functions/reference/uniq.md)
|
||||
- [uniqExact](/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md)
|
||||
- [uniqCombined](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md)
|
||||
- [uniqCombined64](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md)
|
||||
- [uniqHLL12](/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md)
|
||||
- [uniqTheta](/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md)
|
||||
- [quantile](/docs/en/sql-reference/aggregate-functions/reference/quantile.md)
|
||||
- [quantiles](/docs/en/sql-reference/aggregate-functions/reference/quantiles.md)
|
||||
- [quantileExact](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md)
|
||||
- [quantileExactLow](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow)
|
||||
- [quantileExactHigh](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh)
|
||||
- [quantileExactWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md)
|
||||
- [quantileTiming](/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md)
|
||||
- [quantileTimingWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
|
||||
- [quantileDeterministic](/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md)
|
||||
- [quantileTDigest](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md)
|
||||
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
|
||||
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
|
||||
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
|
||||
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
|
||||
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
|
||||
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
|
||||
- [categoricalInformationValue](/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)
|
||||
- [contingency](./contingency.md)
|
||||
- [cramersV](./cramersv.md)
|
||||
- [cramersVBiasCorrected](./cramersvbiascorrected.md)
|
||||
|
@ -30,11 +30,11 @@ Samples must belong to continuous, one-dimensional probability distributions.
|
||||
The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
|
||||
And the alternative is that the distributions are not identical.
|
||||
- `'greater'`
|
||||
The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
|
||||
The null hypothesis is that values in the first sample are *stochastically smaller* than those in the second one,
|
||||
e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
|
||||
Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
|
||||
- `'less'`.
|
||||
The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
|
||||
The null hypothesis is that values in the first sample are *stochastically greater* than those in the second one,
|
||||
e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
|
||||
Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
|
||||
- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
|
||||
|
@ -6,12 +6,20 @@ sidebar_position: 8
|
||||
# last_value
|
||||
|
||||
Selects the last encountered value, similar to `anyLast`, but could accept NULL.
|
||||
|
||||
Mostly it should be used with [Window Functions](../../window-functions/index.md).
|
||||
Without Window Functions the result will be random if the source stream is not ordered.
|
||||
|
||||
## examples
|
||||
|
||||
```sql
|
||||
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
|
||||
CREATE TABLE test_data
|
||||
(
|
||||
a Int64,
|
||||
b Nullable(Int64)
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null)
|
||||
```
|
||||
|
||||
### example1
|
||||
@ -50,4 +58,24 @@ select last_value(b) respect nulls from test_data
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
### example4
|
||||
Stabilized result using the sub-query with `ORDER BY`.
|
||||
```sql
|
||||
SELECT
|
||||
last_value_respect_nulls(b),
|
||||
last_value(b)
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM test_data
|
||||
ORDER BY a ASC
|
||||
)
|
||||
```
|
||||
|
||||
```text
|
||||
┌─last_value_respect_nulls(b)─┬─last_value(b)─┐
|
||||
│ ᴺᵁᴸᴸ │ 5 │
|
||||
└─────────────────────────────┴───────────────┘
|
||||
```
|
||||
|
||||
|
||||
|
@ -14,7 +14,7 @@ The result depends on the order of running the query, and is nondeterministic.
|
||||
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
|
||||
|
||||
:::note
|
||||
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
|
||||
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significant error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
@ -18,7 +18,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
|
||||
1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`.
|
||||
2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`.
|
||||
3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`.
|
||||
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods.
|
||||
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods.
|
||||
|
||||
### Usage
|
||||
|
||||
|
@ -5,7 +5,11 @@ sidebar_position: 141
|
||||
|
||||
# sumMap
|
||||
|
||||
Syntax: `sumMap(key, value)` or `sumMap(Tuple(key, value))`
|
||||
Syntax: `sumMap(key <Array>, value <Array>)` [Array type](../../data-types/array.md) or `sumMap(Tuple(key <Array>, value <Array>))` [Tuple type](../../data-types/tuple.md).
|
||||
|
||||
Arguments:
|
||||
|
||||
Alias: `sumMappedArrays`.
|
||||
|
||||
Totals the `value` array according to the keys specified in the `key` array.
|
||||
|
||||
@ -27,6 +31,7 @@ CREATE TABLE sum_map(
|
||||
),
|
||||
statusMapTuple Tuple(Array(Int32), Array(Int32))
|
||||
) ENGINE = Log;
|
||||
|
||||
INSERT INTO sum_map VALUES
|
||||
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10], ([1, 2, 3], [10, 10, 10])),
|
||||
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10], ([3, 4, 5], [10, 10, 10])),
|
||||
@ -47,3 +52,7 @@ GROUP BY timeslot
|
||||
│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ ([4,5,6,7,8],[10,10,20,10,10]) │
|
||||
└─────────────────────┴──────────────────────────────────────────────┴────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [-Map combinator for Map datatype](../combinators.md#-map)
|
||||
|
@ -22,7 +22,7 @@ Resolution: 1 second.
|
||||
|
||||
The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01’).
|
||||
|
||||
Timezone agnostic unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions et cetera). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
|
||||
Timezone agnostic Unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions etc.). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
|
||||
|
||||
A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. [The list](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is also available at Wikipedia.
|
||||
|
||||
@ -30,7 +30,7 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
|
||||
|
||||
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
|
||||
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionaly you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
|
||||
|
||||
@ -120,9 +120,9 @@ FROM dt
|
||||
As timezone conversion only changes the metadata, the operation has no computation cost.
|
||||
|
||||
|
||||
## Limitations on timezones support
|
||||
## Limitations on time zones support
|
||||
|
||||
Some timezones may not be supported completely. There are a few cases:
|
||||
Some time zones may not be supported completely. There are a few cases:
|
||||
|
||||
If the offset from UTC is not a multiple of 15 minutes, the calculation of hours and minutes can be incorrect. For example, the time zone in Monrovia, Liberia has offset UTC -0:44:30 before 7 Jan 1972. If you are doing calculations on the historical time in Monrovia timezone, the time processing functions may give incorrect results. The results after 7 Jan 1972 will be correct nevertheless.
|
||||
|
||||
|
@ -63,7 +63,7 @@ SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Asia/
|
||||
|
||||
``` text
|
||||
┌───────────────timestamp─┬─event_id─┐
|
||||
│ 2019-01-01 00:00:00.000 │ 2 │
|
||||
│ 2019-01-01 00:00:00.000 │ 3 │
|
||||
└─────────────────────────┴──────────┘
|
||||
```
|
||||
|
||||
@ -75,8 +75,8 @@ SELECT * FROM dt WHERE timestamp = toDateTime64(1546300800.123, 3);
|
||||
|
||||
``` text
|
||||
┌───────────────timestamp─┬─event_id─┐
|
||||
│ 2019-01-01 00:00:00.123 │ 1 │
|
||||
│ 2019-01-01 00:00:00.123 │ 2 │
|
||||
│ 2019-01-01 03:00:00.123 │ 1 │
|
||||
│ 2019-01-01 03:00:00.123 │ 2 │
|
||||
└─────────────────────────┴──────────┘
|
||||
```
|
||||
|
||||
@ -91,7 +91,7 @@ SELECT toDateTime64(now(), 3, 'Asia/Istanbul') AS column, toTypeName(column) AS
|
||||
|
||||
``` text
|
||||
┌──────────────────column─┬─x──────────────────────────────┐
|
||||
│ 2019-10-16 04:12:04.000 │ DateTime64(3, 'Asia/Istanbul') │
|
||||
│ 2023-06-05 00:09:52.000 │ DateTime64(3, 'Asia/Istanbul') │
|
||||
└─────────────────────────┴────────────────────────────────┘
|
||||
```
|
||||
|
||||
@ -100,13 +100,14 @@ SELECT toDateTime64(now(), 3, 'Asia/Istanbul') AS column, toTypeName(column) AS
|
||||
``` sql
|
||||
SELECT
|
||||
toDateTime64(timestamp, 3, 'Europe/London') as lon_time,
|
||||
toDateTime64(timestamp, 3, 'Asia/Istanbul') as mos_time
|
||||
toDateTime64(timestamp, 3, 'Asia/Istanbul') as istanbul_time
|
||||
FROM dt;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌───────────────lon_time──┬────────────────mos_time─┐
|
||||
│ 2019-01-01 00:00:00.000 │ 2019-01-01 03:00:00.000 │
|
||||
┌────────────────lon_time─┬───────────istanbul_time─┐
|
||||
│ 2019-01-01 00:00:00.123 │ 2019-01-01 03:00:00.123 │
|
||||
│ 2019-01-01 00:00:00.123 │ 2019-01-01 03:00:00.123 │
|
||||
│ 2018-12-31 21:00:00.000 │ 2019-01-01 00:00:00.000 │
|
||||
└─────────────────────────┴─────────────────────────┘
|
||||
```
|
||||
@ -115,10 +116,9 @@ FROM dt;
|
||||
|
||||
- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md)
|
||||
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-for-working-with-dates-and-times)
|
||||
- [`Date` data type](../../sql-reference/data-types/date.md)
|
||||
- [`DateTime` data type](../../sql-reference/data-types/datetime.md)
|
||||
|
@ -27,7 +27,7 @@ ClickHouse data types include:
|
||||
- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
|
||||
- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
|
||||
- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
|
||||
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
|
||||
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column settings its default value for the data type)
|
||||
- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
|
||||
- **Geo types**: for [geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
|
||||
- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)
|
||||
|
@ -108,6 +108,7 @@ Result:
|
||||
|
||||
- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
|
||||
- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
|
||||
- [-Map combinator for Map datatype](../aggregate-functions/combinators.md#-map)
|
||||
|
||||
|
||||
## Related content
|
||||
|
@ -247,7 +247,7 @@ LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
|
||||
|
||||
### hashed
|
||||
|
||||
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
|
||||
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items.
|
||||
|
||||
The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
@ -984,7 +984,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
|
||||
...
|
||||
```
|
||||
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronous and asynchronous updates are supported.
|
||||
|
||||
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
|
||||
|
||||
@ -1243,8 +1243,8 @@ Setting fields:
|
||||
- `password` – Password required for the authentication.
|
||||
- `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter.
|
||||
- `header` – Single HTTP header entry.
|
||||
- `name` – Identifiant name used for the header send on the request.
|
||||
- `value` – Value set for a specific identifiant name.
|
||||
- `name` – Identifier name used for the header send on the request.
|
||||
- `value` – Value set for a specific identifier name.
|
||||
|
||||
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.
|
||||
|
||||
@ -2280,7 +2280,7 @@ This config consists of a list of regular expression tree nodes. Each node has t
|
||||
- The value of an attribute may contain **back references**, referring to capture groups of the matched regular expression. In the example, the value of attribute `version` in the first node consists of a back-reference `\1` to capture group `(\d+[\.\d]*)` in the regular expression. Back-reference numbers range from 1 to 9 and are written as `$1` or `\1` (for number 1). The back reference is replaced by the matched capture group during query execution.
|
||||
- **child nodes**: a list of children of a regexp tree node, each of which has its own attributes and (potentially) children nodes. String matching proceeds in a depth-first fashion. If a string matches a regexp node, the dictionary checks if it also matches the nodes' child nodes. If that is the case, the attributes of the deepest matching node are assigned. Attributes of a child node overwrite equally named attributes of parent nodes. The name of child nodes in YAML files can be arbitrary, e.g. `versions` in above example.
|
||||
|
||||
Regexp tree dictionaries only allow access using the functions `dictGet` and `dictGetOrDefault`.
|
||||
Regexp tree dictionaries only allow access using the functions `dictGet`, `dictGetOrDefault`, and `dictGetAll`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -2300,6 +2300,67 @@ In this case, we first match the regular expression `\d+/tclwebkit(?:\d+[\.\d]*)
|
||||
|
||||
With a powerful YAML configure file, we can use a regexp tree dictionaries as a user agent string parser. We support [uap-core](https://github.com/ua-parser/uap-core) and demonstrate how to use it in the functional test [02504_regexp_dictionary_ua_parser](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh)
|
||||
|
||||
#### Collecting Attribute Values
|
||||
|
||||
Sometimes it is useful to return values from multiple regular expressions that matched, rather than just the value of a leaf node. In these cases, the specialized [`dictGetAll`](../../sql-reference/functions/ext-dict-functions.md#dictgetall) function can be used. If a node has an attribute value of type `T`, `dictGetAll` will return an `Array(T)` containing zero or more values.
|
||||
|
||||
By default, the number of matches returned per key is unbounded. A bound can be passed as an optional fourth argument to `dictGetAll`. The array is populated in _topological order_, meaning that child nodes come before parent nodes, and sibling nodes follow the ordering in the source.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE DICTIONARY regexp_dict
|
||||
(
|
||||
regexp String,
|
||||
tag String,
|
||||
topological_index Int64,
|
||||
captured Nullable(String),
|
||||
parent String
|
||||
)
|
||||
PRIMARY KEY(regexp)
|
||||
SOURCE(YAMLRegExpTree(PATH '/var/lib/clickhouse/user_files/regexp_tree.yaml'))
|
||||
LAYOUT(regexp_tree)
|
||||
LIFETIME(0)
|
||||
```
|
||||
|
||||
```yaml
|
||||
# /var/lib/clickhouse/user_files/regexp_tree.yaml
|
||||
- regexp: 'clickhouse\.com'
|
||||
tag: 'ClickHouse'
|
||||
topological_index: 1
|
||||
paths:
|
||||
- regexp: 'clickhouse\.com/docs(.*)'
|
||||
tag: 'ClickHouse Documentation'
|
||||
topological_index: 0
|
||||
captured: '\1'
|
||||
parent: 'ClickHouse'
|
||||
|
||||
- regexp: '/docs(/|$)'
|
||||
tag: 'Documentation'
|
||||
topological_index: 2
|
||||
|
||||
- regexp: 'github.com'
|
||||
tag: 'GitHub'
|
||||
topological_index: 3
|
||||
captured: 'NULL'
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE urls (url String) ENGINE=MergeTree ORDER BY url;
|
||||
INSERT INTO urls VALUES ('clickhouse.com'), ('clickhouse.com/docs/en'), ('github.com/clickhouse/tree/master/docs');
|
||||
SELECT url, dictGetAll('regexp_dict', ('tag', 'topological_index', 'captured', 'parent'), url, 2) FROM urls;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─url────────────────────────────────────┬─dictGetAll('regexp_dict', ('tag', 'topological_index', 'captured', 'parent'), url, 2)─┐
|
||||
│ clickhouse.com │ (['ClickHouse'],[1],[],[]) │
|
||||
│ clickhouse.com/docs/en │ (['ClickHouse Documentation','ClickHouse'],[0,1],['/en'],['ClickHouse']) │
|
||||
│ github.com/clickhouse/tree/master/docs │ (['Documentation','GitHub'],[2,3],[NULL],[]) │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Use Regular Expression Tree Dictionary in ClickHouse Cloud
|
||||
|
||||
Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause.
|
||||
|
@ -140,7 +140,7 @@ range([start, ] end [, step])
|
||||
|
||||
**Implementation details**
|
||||
|
||||
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's.
|
||||
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments.
|
||||
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
|
||||
|
||||
**Examples**
|
||||
@ -1236,7 +1236,7 @@ arrayAUC(arr_scores, arr_labels)
|
||||
**Arguments**
|
||||
|
||||
- `arr_scores` — scores prediction model gives.
|
||||
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample.
|
||||
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -226,7 +226,7 @@ Result:
|
||||
|
||||
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
|
||||
|
||||
The conjuction for bitwise operations:
|
||||
The conjuction for bit-wise operations:
|
||||
|
||||
0 AND 0 = 0
|
||||
|
||||
@ -291,7 +291,7 @@ Result:
|
||||
|
||||
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
|
||||
|
||||
The disjunction for bitwise operations:
|
||||
The disjunction for bit-wise operations:
|
||||
|
||||
0 OR 0 = 0
|
||||
|
||||
|
@ -403,12 +403,14 @@ from_date32: 1509840000
|
||||
```
|
||||
|
||||
:::note
|
||||
The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default.
|
||||
The return type of `toStartOf*`, `toLastDayOf*`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default.
|
||||
|
||||
Behavior for
|
||||
* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
|
||||
* `enable_extended_results_for_datetime_functions = 0`:
|
||||
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`.
|
||||
* Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
|
||||
* `enable_extended_results_for_datetime_functions = 1`:
|
||||
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`.
|
||||
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`.
|
||||
* Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime` if their argument is a `Date` or `DateTime`, and they return `DateTime64` if their argument is a `Date32` or `DateTime64`.
|
||||
:::
|
||||
|
||||
@ -463,6 +465,18 @@ The mode argument works exactly like the mode argument in function `toWeek()`. I
|
||||
toStartOfWeek(t[, mode[, timezone]])
|
||||
```
|
||||
|
||||
## toLastDayOfWeek
|
||||
|
||||
Rounds a date or date with time up to the nearest Saturday or Sunday.
|
||||
Returns the date.
|
||||
The mode argument works exactly like the mode argument in function `toWeek()`. If no mode is specified, mode is assumed as 0.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toLastDayOfWeek(t[, mode[, timezone]])
|
||||
```
|
||||
|
||||
## toStartOfDay
|
||||
|
||||
Rounds down a date with time to the start of the day.
|
||||
|
@ -487,7 +487,7 @@ cosineDistance(vector1, vector2)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Cosine of the angle between two vectors substracted from one.
|
||||
- Cosine of the angle between two vectors subtracted from one.
|
||||
|
||||
Type: [Float](../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -31,9 +31,9 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
**Arguments**
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
@ -165,7 +165,7 @@ Received exception from server (version 22.6.1):
|
||||
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
|
||||
```
|
||||
|
||||
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
|
||||
While `aes_encrypt_mysql` produces MySQL-compatible output:
|
||||
|
||||
Query:
|
||||
|
||||
@ -233,7 +233,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
@ -364,7 +364,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -403,6 +403,84 @@ SELECT dictGetDescendants('hierarchy_flat_dictionary', number, 1) FROM system.nu
|
||||
└────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## dictGetAll
|
||||
|
||||
Retrieves the attribute values of all nodes that matched each key in a [regular expression tree dictionary](../../sql-reference/dictionaries/index.md#regexp-tree-dictionary).
|
||||
|
||||
Besides returning values of type `Array(T)` instead of `T`, this function behaves similarly to [`dictGet`](#dictget-dictgetordefault-dictgetornull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
dictGetAll('dict_name', attr_names, id_expr[, limit])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)).
|
||||
- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
|
||||
- `limit` - Maximum length for each value array returned. When truncating, child nodes are given precedence over parent nodes, and otherwise the defined list order for the regexp tree dictionary is respected. If unspecified, array length is unlimited.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- If ClickHouse parses the attribute successfully in the attribute’s data type as defined in the dictionary, returns an array of dictionary attribute values that correspond to `id_expr` for each attribute specified by `attr_names`.
|
||||
|
||||
- If there is no key corresponding to `id_expr` in the dictionary, then an empty array is returned.
|
||||
|
||||
ClickHouse throws an exception if it cannot parse the value of the attribute or the value does not match the attribute data type.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the following regexp tree dictionary:
|
||||
|
||||
```sql
|
||||
CREATE DICTIONARY regexp_dict
|
||||
(
|
||||
regexp String,
|
||||
tag String
|
||||
)
|
||||
PRIMARY KEY(regexp)
|
||||
SOURCE(YAMLRegExpTree(PATH '/var/lib/clickhouse/user_files/regexp_tree.yaml'))
|
||||
LAYOUT(regexp_tree)
|
||||
...
|
||||
```
|
||||
|
||||
```yaml
|
||||
# /var/lib/clickhouse/user_files/regexp_tree.yaml
|
||||
- regexp: 'foo'
|
||||
tag: 'foo_attr'
|
||||
- regexp: 'bar'
|
||||
tag: 'bar_attr'
|
||||
- regexp: 'baz'
|
||||
tag: 'baz_attr'
|
||||
```
|
||||
|
||||
Get all matching values:
|
||||
|
||||
```sql
|
||||
SELECT dictGetAll('regexp_dict', 'tag', 'foobarbaz');
|
||||
```
|
||||
|
||||
```text
|
||||
┌─dictGetAll('regexp_dict', 'tag', 'foobarbaz')─┐
|
||||
│ ['foo_attr','bar_attr','baz_attr'] │
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Get up to 2 matching values:
|
||||
|
||||
```sql
|
||||
SELECT dictGetAll('regexp_dict', 'tag', 'foobarbaz', 2);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─dictGetAll('regexp_dict', 'tag', 'foobarbaz', 2)─┐
|
||||
│ ['foo_attr','bar_attr'] │
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Other Functions
|
||||
|
||||
ClickHouse supports specialized functions that convert dictionary attribute values to a specific data type regardless of the dictionary configuration.
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Files
|
||||
|
||||
## file
|
||||
|
||||
Reads file as string and loads the data into the specified column. The actual file content is not interpreted.
|
||||
Reads a file as string and loads the data into the specified column. The file content is not interpreted.
|
||||
|
||||
Also see table function [file](../table-functions/file.md).
|
||||
|
||||
@ -18,15 +18,13 @@ file(path[, default])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports the following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
|
||||
- `default` — The value that will be returned in the case the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
|
||||
- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
**Example**
|
||||
|
||||
Inserting data from files a.txt and b.txt into a table as strings:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO table SELECT file('a.txt'), file('b.txt');
|
||||
```
|
||||
|
@ -8,7 +8,7 @@ sidebar_label: Nullable
|
||||
|
||||
## isNull
|
||||
|
||||
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
``` sql
|
||||
isNull(x)
|
||||
@ -18,7 +18,7 @@ Alias: `ISNULL`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — A value with a non-compound data type.
|
||||
- `x` — A value of non-compound data type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -27,7 +27,7 @@ Alias: `ISNULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Input table
|
||||
Table:
|
||||
|
||||
``` text
|
||||
┌─x─┬────y─┐
|
||||
@ -36,12 +36,14 @@ Input table
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
Query
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT x FROM t_null WHERE isNull(y);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─x─┐
|
||||
│ 1 │
|
||||
@ -50,7 +52,7 @@ SELECT x FROM t_null WHERE isNull(y);
|
||||
|
||||
## isNotNull
|
||||
|
||||
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
``` sql
|
||||
isNotNull(x)
|
||||
@ -58,16 +60,16 @@ isNotNull(x)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
- `x` — A value with a non-compound data type.
|
||||
- `x` — A value of non-compound data type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `0` if `x` is `NULL`.
|
||||
- `1` if `x` is not `NULL`.
|
||||
- `0` if `x` is `NULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Input table
|
||||
Table:
|
||||
|
||||
``` text
|
||||
┌─x─┬────y─┐
|
||||
@ -76,12 +78,14 @@ Input table
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
Query
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT x FROM t_null WHERE isNotNull(y);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─x─┐
|
||||
│ 2 │
|
||||
@ -90,7 +94,7 @@ SELECT x FROM t_null WHERE isNotNull(y);
|
||||
|
||||
## coalesce
|
||||
|
||||
Checks from left to right whether `NULL` arguments were passed and returns the first non-`NULL` argument.
|
||||
Returns the leftmost non-`NULL` argument.
|
||||
|
||||
``` sql
|
||||
coalesce(x,...)
|
||||
@ -98,11 +102,11 @@ coalesce(x,...)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
- Any number of parameters of a non-compound type. All parameters must be compatible by data type.
|
||||
- Any number of parameters of non-compound type. All parameters must be of mutually compatible data types.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The first non-`NULL` argument.
|
||||
- The first non-`NULL` argument
|
||||
- `NULL`, if all arguments are `NULL`.
|
||||
|
||||
**Example**
|
||||
@ -110,10 +114,10 @@ coalesce(x,...)
|
||||
Consider a list of contacts that may specify multiple ways to contact a customer.
|
||||
|
||||
``` text
|
||||
┌─name─────┬─mail─┬─phone─────┬──icq─┐
|
||||
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────┴───────────┴──────┘
|
||||
┌─name─────┬─mail─┬─phone─────┬──telegram─┐
|
||||
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
|
||||
@ -121,22 +125,22 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32
|
||||
Get the first available contact method for the customer from the contact list:
|
||||
|
||||
``` sql
|
||||
SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
|
||||
SELECT name, coalesce(mail, phone, CAST(telegram,'Nullable(String)')) FROM aBook;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
|
||||
│ client 1 │ 123-45-67 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────────────────────────────────────────────────────┘
|
||||
┌─name─────┬─coalesce(mail, phone, CAST(telegram, 'Nullable(String)'))─┐
|
||||
│ client 1 │ 123-45-67 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │
|
||||
└──────────┴───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## ifNull
|
||||
|
||||
Returns an alternative value if the main argument is `NULL`.
|
||||
Returns an alternative value if the argument is `NULL`.
|
||||
|
||||
``` sql
|
||||
ifNull(x,alt)
|
||||
ifNull(x, alt)
|
||||
```
|
||||
|
||||
**Arguments:**
|
||||
@ -146,25 +150,33 @@ ifNull(x,alt)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The value `x`, if `x` is not `NULL`.
|
||||
- The value `alt`, if `x` is `NULL`.
|
||||
- `x` if `x` is not `NULL`.
|
||||
- `alt` if `x` is `NULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT ifNull('a', 'b');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─ifNull('a', 'b')─┐
|
||||
│ a │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT ifNull(NULL, 'b');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─ifNull(NULL, 'b')─┐
|
||||
│ b │
|
||||
@ -173,7 +185,7 @@ SELECT ifNull(NULL, 'b');
|
||||
|
||||
## nullIf
|
||||
|
||||
Returns `NULL` if the arguments are equal.
|
||||
Returns `NULL` if both arguments are equal.
|
||||
|
||||
``` sql
|
||||
nullIf(x, y)
|
||||
@ -181,29 +193,37 @@ nullIf(x, y)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
`x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception.
|
||||
`x`, `y` — Values to compare. Must be of compatible types.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- `NULL`, if the arguments are equal.
|
||||
- The `x` value, if the arguments are not equal.
|
||||
- `NULL` if the arguments are equal.
|
||||
- `x` if the arguments are not equal.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT nullIf(1, 1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─nullIf(1, 1)─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT nullIf(1, 2);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─nullIf(1, 2)─┐
|
||||
│ 1 │
|
||||
@ -212,7 +232,7 @@ SELECT nullIf(1, 2);
|
||||
|
||||
## assumeNotNull
|
||||
|
||||
Results in an equivalent non-`Nullable` value for a [Nullable](../../sql-reference/data-types/nullable.md) type. In case the original value is `NULL` the result is undetermined. See also `ifNull` and `coalesce` functions.
|
||||
Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`.
|
||||
|
||||
``` sql
|
||||
assumeNotNull(x)
|
||||
@ -224,36 +244,29 @@ assumeNotNull(x)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The original value from the non-`Nullable` type, if it is not `NULL`.
|
||||
- Implementation specific result if the original value was `NULL`.
|
||||
- The input value as non-`Nullable` type, if it is not `NULL`.
|
||||
- An arbitrary value, if the input value is `NULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the `t_null` table.
|
||||
|
||||
``` sql
|
||||
SHOW CREATE TABLE t_null;
|
||||
```
|
||||
Table:
|
||||
|
||||
``` text
|
||||
┌─statement─────────────────────────────────────────────────────────────────┐
|
||||
│ CREATE TABLE default.t_null ( x Int8, y Nullable(Int8)) ENGINE = TinyLog │
|
||||
└───────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─x─┬────y─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2 │ 3 │
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
Apply the `assumeNotNull` function to the `y` column.
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT assumeNotNull(y) FROM t_null;
|
||||
SELECT assumeNotNull(y) FROM table;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─assumeNotNull(y)─┐
|
||||
│ 0 │
|
||||
@ -261,10 +274,14 @@ SELECT assumeNotNull(y) FROM t_null;
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(assumeNotNull(y)) FROM t_null;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(assumeNotNull(y))─┐
|
||||
│ Int8 │
|
||||
@ -282,28 +299,36 @@ toNullable(x)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
- `x` — The value of any non-compound type.
|
||||
- `x` — A value of non-compound type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The input value with a `Nullable` type.
|
||||
- The input value but of `Nullable` type.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(10);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(10)─┐
|
||||
│ UInt8 │
|
||||
└────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(toNullable(10));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(toNullable(10))─┐
|
||||
│ Nullable(UInt8) │
|
||||
|
@ -12,7 +12,7 @@ A latitude and longitude pair can be transformed to a 64-bit H3 index, identifyi
|
||||
|
||||
The H3 index is used primarily for bucketing locations and other geospatial manipulations.
|
||||
|
||||
The full description of the H3 system is available at [the Uber Engeneering site](https://eng.uber.com/h3/).
|
||||
The full description of the H3 system is available at [the Uber Engineering site](https://eng.uber.com/h3/).
|
||||
|
||||
## h3IsValid
|
||||
|
||||
|
@ -249,7 +249,7 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
|
||||
**Returned values**
|
||||
|
||||
- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
|
||||
- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
|
52
docs/en/sql-reference/functions/geo/svg.md
Normal file
52
docs/en/sql-reference/functions/geo/svg.md
Normal file
@ -0,0 +1,52 @@
|
||||
---
|
||||
slug: /en/sql-reference/functions/geo/svg
|
||||
sidebar_label: SVG
|
||||
title: "Functions for Generating SVG images from Geo data"
|
||||
---
|
||||
|
||||
## Syntax
|
||||
|
||||
``` sql
|
||||
SVG(geometry,[style])
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- `geometry` — Geo data
|
||||
- `style` — Optional style name
|
||||
|
||||
### Returned value
|
||||
|
||||
- The SVG representation of the geometry:
|
||||
- SVG circle
|
||||
- SVG polygon
|
||||
- SVG path
|
||||
|
||||
Type: String
|
||||
|
||||
## Examples
|
||||
|
||||
### Circle
|
||||
```sql
|
||||
SELECT SVG((0., 0.))
|
||||
```
|
||||
```response
|
||||
<circle cx="0" cy="0" r="5" style=""/>
|
||||
```
|
||||
|
||||
### Polygon
|
||||
```sql
|
||||
SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)])
|
||||
```
|
||||
```response
|
||||
<polygon points="0,0 0,10 10,10 10,0 0,0" style=""/>
|
||||
```
|
||||
|
||||
### Path
|
||||
```sql
|
||||
SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]])
|
||||
```
|
||||
```response
|
||||
<g fill-rule="evenodd"><path d="M 0,0 L 0,10 L 10,10 L 10,0 L 0,0M 4,4 L 5,4 L 5,5 L 4,5 L 4,4 z " style=""/></g>
|
||||
```
|
||||
|
@ -560,77 +560,6 @@ Result:
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
## Entropy-learned hashing (experimental)
|
||||
|
||||
Entropy-learned hashing is disabled by default, to enable: `SET allow_experimental_hash_functions=1`.
|
||||
|
||||
Entropy-learned hashing is not a standalone hash function like `metroHash64`, `cityHash64`, `sipHash64` etc. Instead, it aims to preprocess
|
||||
the data to be hashed in a way that a standalone hash function can be computed more efficiently while not compromising the hash quality,
|
||||
i.e. the randomness of the hashes. For that, entropy-based hashing chooses a subset of the bytes in a training data set of Strings which has
|
||||
the same randomness (entropy) as the original Strings. For example, if the Strings are in average 100 bytes long, and we pick a subset of 5
|
||||
bytes, then a hash function will be 95% less expensive to evaluate. For details of the method, refer to [Entropy-Learned Hashing: Constant
|
||||
Time Hashing with Controllable Uniformity](https://doi.org/10.1145/3514221.3517894).
|
||||
|
||||
Entropy-learned hashing has two phases:
|
||||
|
||||
1. A training phase on a representative but typically small set of Strings to be hashed. Training consists of two steps:
|
||||
|
||||
- Function `prepareTrainEntropyLearnedHash(data, id)` caches the training data in a global state under a given `id`. It returns dummy
|
||||
value `0` on every row.
|
||||
- Function `trainEntropyLearnedHash(id)` computes a minimal partial sub-key of the training data stored stored under `id` in the global
|
||||
state. The cached training data in the global state is replaced by the partial key. Dummy value `0` is returned on every row.
|
||||
|
||||
2. An evaluation phase where hashes are computed using the previously calculated partial sub-keys. Function `entropyLearnedHash(data, id)`
|
||||
hashes `data` using the partial subkey stored as `id`. CityHash64 is used as hash function.
|
||||
|
||||
The reason that the training phase comprises two steps is that ClickHouse processes data at chunk granularity but entropy-learned hashing
|
||||
needs to process the entire training set at once.
|
||||
|
||||
Since functions `prepareTrainEntropyLearnedHash()` and `trainEntropyLearnedHash()` access global state, they should not be called in
|
||||
parallel with the same `id`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
prepareTrainEntropyLearnedHash(data, id);
|
||||
trainEntropyLearnedHash(id);
|
||||
entropyLearnedHash(data, id);
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SET allow_experimental_hash_functions=1;
|
||||
CREATE TABLE tab (col String) ENGINE=Memory;
|
||||
INSERT INTO tab VALUES ('aa'), ('ba'), ('ca');
|
||||
|
||||
SELECT prepareTrainEntropyLearnedHash(col, 'id1') AS prepared FROM tab;
|
||||
SELECT trainEntropyLearnedHash('id1') AS trained FROM tab;
|
||||
SELECT entropyLearnedHash(col, 'id1') as hashes FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` response
|
||||
┌─prepared─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
└──────────┘
|
||||
|
||||
┌─trained─┐
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
│ 0 │
|
||||
└─────────┘
|
||||
|
||||
┌───────────────hashes─┐
|
||||
│ 2603192927274642682 │
|
||||
│ 4947675599669400333 │
|
||||
│ 10783339242466472992 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## metroHash64
|
||||
|
||||
Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/) hash value.
|
||||
@ -697,7 +626,7 @@ SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
|
||||
|
||||
## gccMurmurHash
|
||||
|
||||
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between CLang and GCC builds.
|
||||
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between Clang and GCC builds.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1161,7 +1090,7 @@ wordShingleSimHashUTF8(string[, shinglesize])
|
||||
**Arguments**
|
||||
|
||||
- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
|
||||
- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user