Merge branch 'master' of github.com:ClickHouse/ClickHouse into hedged-requests

This commit is contained in:
Pavel Kruglov 2021-01-29 21:08:47 +03:00
commit 25e85d71ee
890 changed files with 27103 additions and 17811 deletions

View File

@ -12,6 +12,9 @@ assignees: ''
**Describe the bug**
A clear and concise description of what works not as it is supposed to.
**Does it reproduce on recent release?**
[The list of releases](https://github.com/ClickHouse/ClickHouse/blob/master/utils/list-versions/version_date.tsv)
**How to reproduce**
* Which ClickHouse server version to use
* Which interface to use, if matters

View File

@ -0,0 +1,19 @@
---
name: Sanitizer alert
about: Potential issue has been found by special code instrumentation
title: ''
labels: testing
assignees: ''
---
(you don't have to strictly follow this form)
**Describe the bug**
A link to the report
**How to reproduce**
Try to reproduce the report and copy the tables and queries involved.
**Error message and/or stacktrace**
You can find additional information in server logs.

2
.gitmodules vendored
View File

@ -84,7 +84,7 @@
url = https://github.com/google/brotli.git
[submodule "contrib/h3"]
path = contrib/h3
url = https://github.com/uber/h3
url = https://github.com/ClickHouse-Extras/h3
[submodule "contrib/hyperscan"]
path = contrib/hyperscan
url = https://github.com/ClickHouse-Extras/hyperscan.git

45
.pylintrc Normal file
View File

@ -0,0 +1,45 @@
# vim: ft=config
[BASIC]
max-module-lines=2000
# due to SQL
max-line-length=200
# Drop/decrease them one day:
max-branches=50
max-nested-blocks=10
max-statements=200
[FORMAT]
ignore-long-lines = (# )?<?https?://\S+>?$
[MESSAGES CONTROL]
disable = bad-continuation,
missing-docstring,
bad-whitespace,
too-few-public-methods,
invalid-name,
too-many-arguments,
keyword-arg-before-vararg,
too-many-locals,
too-many-instance-attributes,
cell-var-from-loop,
fixme,
too-many-public-methods,
wildcard-import,
unused-wildcard-import,
singleton-comparison,
# pytest.mark.parametrize is not callable (not-callable)
not-callable,
# https://github.com/PyCQA/pylint/issues/3882
# [Python 3.9] Value 'Optional' is unsubscriptable (unsubscriptable-object) (also Union)
unsubscriptable-object,
# Drop them one day:
redefined-outer-name,
broad-except,
bare-except,
no-else-return,
global-statement
[SIMILARITIES]
# due to SQL
min-similarity-lines=1000

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
Copyright 2016-2020 Yandex LLC
Copyright 2016-2021 Yandex LLC
Apache License
Version 2.0, January 2004
@ -188,7 +188,7 @@ Copyright 2016-2020 Yandex LLC
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2020 Yandex LLC
Copyright 2016-2021 Yandex LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -16,4 +16,4 @@ ClickHouse® is an open-source column-oriented database management system that a
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [SF Bay Area ClickHouse Virtual Office Hours (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/274273549/) on 20 January 2020.
* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2021.

View File

@ -15,9 +15,13 @@ currently being supported with security updates:
| 20.4 | :x: |
| 20.5 | :x: |
| 20.6 | :x: |
| 20.7 | :white_check_mark: |
| 20.7 | :x: |
| 20.8 | :white_check_mark: |
| 20.9 | :white_check_mark: |
| 20.9 | :x: |
| 20.10 | :x: |
| 20.11 | :white_check_mark: |
| 20.12 | :white_check_mark: |
| 21.1 | :white_check_mark: |
## Reporting a Vulnerability

View File

@ -229,8 +229,12 @@ public:
inline UInt8 daysInMonth(UInt16 year, UInt8 month) const
{
UInt16 idx = year - DATE_LUT_MIN_YEAR;
if (unlikely(idx >= DATE_LUT_YEARS))
return 31; /// Implementation specific behaviour on overflow.
/// 32 makes arithmetic more simple.
DayNum any_day_of_month = DayNum(years_lut[year - DATE_LUT_MIN_YEAR] + 32 * (month - 1));
DayNum any_day_of_month = DayNum(years_lut[idx] + 32 * (month - 1));
return lut[any_day_of_month].days_in_month;
}
@ -767,7 +771,7 @@ public:
/// Adding calendar intervals.
/// Implementation specific behaviour when delta is too big.
inline time_t addDays(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED time_t addDays(time_t t, Int64 delta) const
{
DayNum index = findIndex(t);
time_t time_offset = toHour(t) * 3600 + toMinute(t) * 60 + toSecond(t);
@ -780,7 +784,7 @@ public:
return lut[index].date + time_offset;
}
inline time_t addWeeks(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED time_t addWeeks(time_t t, Int64 delta) const
{
return addDays(t, delta * 7);
}
@ -812,7 +816,7 @@ public:
return lut[result_day].date + time_offset;
}
inline DayNum addMonths(DayNum d, Int64 delta) const
inline NO_SANITIZE_UNDEFINED DayNum addMonths(DayNum d, Int64 delta) const
{
const Values & values = lut[d];
@ -836,18 +840,18 @@ public:
}
}
inline time_t addQuarters(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED time_t addQuarters(time_t t, Int64 delta) const
{
return addMonths(t, delta * 3);
}
inline DayNum addQuarters(DayNum d, Int64 delta) const
inline NO_SANITIZE_UNDEFINED DayNum addQuarters(DayNum d, Int64 delta) const
{
return addMonths(d, delta * 3);
}
/// Saturation can occur if 29 Feb is mapped to non-leap year.
inline time_t addYears(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED time_t addYears(time_t t, Int64 delta) const
{
DayNum result_day = addYears(toDayNum(t), delta);
@ -859,7 +863,7 @@ public:
return lut[result_day].date + time_offset;
}
inline DayNum addYears(DayNum d, Int64 delta) const
inline NO_SANITIZE_UNDEFINED DayNum addYears(DayNum d, Int64 delta) const
{
const Values & values = lut[d];

View File

@ -84,10 +84,12 @@
# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
# define NO_SANITIZE_UNDEFINED
# define NO_SANITIZE_ADDRESS
# define NO_SANITIZE_THREAD
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
#endif
/// A template function for suppressing warnings about unused variables or function results.

View File

@ -104,8 +104,3 @@ template <> struct is_big_int<wUInt256> { static constexpr bool value = true; };
template <typename T>
inline constexpr bool is_big_int_v = is_big_int<T>::value;
template <typename To, typename From>
inline To bigint_cast(const From & x [[maybe_unused]])
{
return static_cast<To>(x);
}

View File

@ -27,9 +27,12 @@ if (GLIBC_COMPATIBILITY)
list(APPEND glibc_compatibility_sources musl/getentropy.c)
endif()
add_library (clickhouse_memcpy OBJECT
${ClickHouse_SOURCE_DIR}/contrib/FastMemcpy/memcpy_wrapper.c
)
if (NOT ARCH_ARM)
# clickhouse_memcpy don't support ARCH_ARM, see https://github.com/ClickHouse/ClickHouse/issues/18951
add_library (clickhouse_memcpy OBJECT
${ClickHouse_SOURCE_DIR}/contrib/FastMemcpy/memcpy_wrapper.c
)
endif()
# Need to omit frame pointers to match the performance of glibc
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer")

View File

@ -4,6 +4,12 @@
#include "syscall.h"
#include "atomic.h"
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
#endif
#ifdef VDSO_GETCPU_SYM
static void *volatile vdso_func;
@ -37,6 +43,13 @@ int sched_getcpu(void)
#endif
r = __syscall(SYS_getcpu, &cpu, 0, 0);
if (!r) return cpu;
if (!r) {
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
__msan_unpoison(&cpu, sizeof(cpu));
#endif
#endif
return cpu;
}
return __syscall_ret(r);
}

View File

@ -118,7 +118,9 @@ TRAP(logout)
TRAP(logwtmp)
TRAP(lrand48)
TRAP(mallinfo)
TRAP(mallopt)
#if !defined(SANITIZER)
TRAP(mallopt) // Used by tsan
#endif
TRAP(mblen)
TRAP(mbrlen)
TRAP(mbrtowc)
@ -193,7 +195,9 @@ TRAP(dbm_nextkey)
TRAP(dbm_open)
TRAP(dbm_store)
TRAP(dirname)
TRAP(dlerror)
#if !defined(SANITIZER)
TRAP(dlerror) // Used by tsan
#endif
TRAP(ftw)
TRAP(getc_unlocked)
//TRAP(getenv) // Ok at program startup

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54445)
SET(VERSION_REVISION 54447)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 1)
SET(VERSION_MINOR 2)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 667dd0cf0ccecdaa6f334177b7ece2f53bd196a1)
SET(VERSION_DESCRIBE v21.1.1.5646-prestable)
SET(VERSION_STRING 21.1.1.5646)
SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7)
SET(VERSION_DESCRIBE v21.2.1.1-prestable)
SET(VERSION_STRING 21.2.1.1)
# end of autochange

View File

@ -1,10 +1,4 @@
if (NOT ARCH_ARM AND OPENSSL_FOUND)
option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL")
elseif(ENABLE_RDKAFKA)
message (${RECONFIGURE_MESSAGE_LEVEL} "librdafka is not supported on ARM and on FreeBSD")
endif ()
option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
if (NOT ENABLE_RDKAFKA)
if (USE_INTERNAL_RDKAFKA_LIBRARY)
@ -13,11 +7,7 @@ if (NOT ENABLE_RDKAFKA)
return()
endif()
if (NOT ARCH_ARM)
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
elseif(USE_INTERNAL_RDKAFKA_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ARCH_ARM=${ARCH_ARM}")
endif ()
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/CMakeLists.txt")
if(USE_INTERNAL_RDKAFKA_LIBRARY)
@ -67,14 +57,12 @@ if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR)
if (LZ4_LIBRARY)
list (APPEND RDKAFKA_LIBRARY ${LZ4_LIBRARY})
endif ()
elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY AND NOT MISSING_INTERNAL_CPPKAFKA_LIBRARY AND NOT ARCH_ARM)
elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY AND NOT MISSING_INTERNAL_CPPKAFKA_LIBRARY)
set (USE_INTERNAL_RDKAFKA_LIBRARY 1)
set (RDKAFKA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src")
set (RDKAFKA_LIBRARY rdkafka)
set (CPPKAFKA_LIBRARY cppkafka)
set (USE_RDKAFKA 1)
elseif(ARCH_ARM)
message (${RECONFIGURE_MESSAGE_LEVEL} "Using internal rdkafka on ARM is not supported")
endif ()
message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY} ${CPPKAFKA_LIBRARY}")

View File

@ -35,6 +35,7 @@ if (NOT ZLIB_FOUND AND NOT MISSING_INTERNAL_ZLIB_LIBRARY)
set (ZLIB_INCLUDE_DIRECTORIES ${ZLIB_INCLUDE_DIR}) # for protobuf
set (ZLIB_FOUND 1) # for poco
set (ZLIB_LIBRARIES zlib CACHE INTERNAL "")
set (ZLIB_LIBRARY_NAME ${ZLIB_LIBRARIES}) # for cassandra
set (ZLIB_NAME "${INTERNAL_ZLIB_NAME}")
endif ()

View File

@ -119,12 +119,6 @@ if (USE_INTERNAL_LDAP_LIBRARY)
add_subdirectory (openldap-cmake)
endif ()
# Should go before:
# - mariadb-connector-c
# - aws-s3-cmake
# - sentry-native
add_subdirectory (curl-cmake)
function(mysql_support)
set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC)
set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC)
@ -142,6 +136,7 @@ function(mysql_support)
set(ZLIB_LIBRARY ${ZLIB_LIBRARIES})
set(WITH_EXTERNAL_ZLIB ON)
endif()
set(WITH_CURL OFF)
add_subdirectory (mariadb-connector-c)
endfunction()
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
@ -288,6 +283,10 @@ if (USE_CASSANDRA)
add_subdirectory (cassandra)
endif()
# Should go before:
# - sentry-native
add_subdirectory (curl-cmake)
if (USE_SENTRY)
add_subdirectory (sentry-native)
endif()

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit a220591e335923ce1c19bbf9eb925787f7ab6c13
Subproject commit 7d48b2c8193679cc4516e5bd68ae4a64b94dae7d

2
contrib/cassandra vendored

@ -1 +1 @@
Subproject commit d10187efb25b26da391def077edf3c6f2f3a23dd
Subproject commit b446d7eb68e6962f431e2b3771313bfe9a2bbd93

2
contrib/dragonbox vendored

@ -1 +1 @@
Subproject commit b2751c65c0592c0239aec3becd53d0ea2fde9329
Subproject commit 923705af6fd953aa948fc175f6020b15f7359838

2
contrib/h3 vendored

@ -1 +1 @@
Subproject commit 6cfd649e8c0d3ed913e8aae928a669fc3b8a2365
Subproject commit e209086ae1b5477307f545a0f6111780edc59940

View File

@ -16,6 +16,7 @@ ${H3_SOURCE_DIR}/lib/mathExtensions.c
${H3_SOURCE_DIR}/lib/polygon.c
${H3_SOURCE_DIR}/lib/vec2d.c
${H3_SOURCE_DIR}/lib/vec3d.c
${H3_SOURCE_DIR}/lib/vertex.c
${H3_SOURCE_DIR}/lib/vertexGraph.c
)

2
contrib/krb5 vendored

@ -1 +1 @@
Subproject commit 90ff6f4f8c695d6bf1aaba78a9b8942be92141c2
Subproject commit 5149dea4e2be0f67707383d2682b897c14631374

2
contrib/libpq vendored

@ -1 +1 @@
Subproject commit 8e7e905854714a7fbb49c124dbc45c7bd4b98e07
Subproject commit 1f9c286dba60809edb64e384d6727d80d269b6cf

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit f2f6616419d567c9198aef0d1133a2e9b4f02276
Subproject commit cf11d0aa36d4738f2c9bf4377807661660f1be76

View File

@ -2,26 +2,25 @@ set(RDKAFKA_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src)
set(SRCS
${RDKAFKA_SOURCE_DIR}/crc32c.c
${RDKAFKA_SOURCE_DIR}/rdkafka_zstd.c
# ${RDKAFKA_SOURCE_DIR}/lz4.c
# ${RDKAFKA_SOURCE_DIR}/lz4frame.c
# ${RDKAFKA_SOURCE_DIR}/lz4hc.c
${RDKAFKA_SOURCE_DIR}/rdxxhash.c
# ${RDKAFKA_SOURCE_DIR}/regexp.c
${RDKAFKA_SOURCE_DIR}/rdaddr.c
${RDKAFKA_SOURCE_DIR}/rdavl.c
${RDKAFKA_SOURCE_DIR}/rdbuf.c
${RDKAFKA_SOURCE_DIR}/rdcrc32.c
${RDKAFKA_SOURCE_DIR}/rddl.c
${RDKAFKA_SOURCE_DIR}/rdfnv1a.c
${RDKAFKA_SOURCE_DIR}/rdgz.c
${RDKAFKA_SOURCE_DIR}/rdhdrhistogram.c
${RDKAFKA_SOURCE_DIR}/rdkafka.c
${RDKAFKA_SOURCE_DIR}/rdkafka_admin.c # looks optional
${RDKAFKA_SOURCE_DIR}/rdkafka_assignment.c
${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c
${RDKAFKA_SOURCE_DIR}/rdkafka_aux.c # looks optional
${RDKAFKA_SOURCE_DIR}/rdkafka_background.c
${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c
${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c
${RDKAFKA_SOURCE_DIR}/rdkafka.c
${RDKAFKA_SOURCE_DIR}/rdkafka_cert.c
${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c
${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c
@ -29,7 +28,9 @@ set(SRCS
${RDKAFKA_SOURCE_DIR}/rdkafka_error.c
${RDKAFKA_SOURCE_DIR}/rdkafka_event.c
${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c
${RDKAFKA_SOURCE_DIR}/rdkafka_header.c
${RDKAFKA_SOURCE_DIR}/rdkafka_idempotence.c
${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c
${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c
@ -49,20 +50,22 @@ set(SRCS
${RDKAFKA_SOURCE_DIR}/rdkafka_request.c
${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c
# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c # optionally included below
# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c # optionally included below
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_scram.c
# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_scram.c # optionally included below
# ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_win32.c
${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c
# ${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c # optionally included below
${RDKAFKA_SOURCE_DIR}/rdkafka_sticky_assignor.c
${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c
${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c
${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c
${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c
${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c
${RDKAFKA_SOURCE_DIR}/rdkafka_header.c
${RDKAFKA_SOURCE_DIR}/rdkafka_txnmgr.c
${RDKAFKA_SOURCE_DIR}/rdkafka_zstd.c
${RDKAFKA_SOURCE_DIR}/rdlist.c
${RDKAFKA_SOURCE_DIR}/rdlog.c
${RDKAFKA_SOURCE_DIR}/rdmap.c
${RDKAFKA_SOURCE_DIR}/rdmurmur2.c
${RDKAFKA_SOURCE_DIR}/rdports.c
${RDKAFKA_SOURCE_DIR}/rdrand.c
@ -70,18 +73,42 @@ set(SRCS
${RDKAFKA_SOURCE_DIR}/rdstring.c
${RDKAFKA_SOURCE_DIR}/rdunittest.c
${RDKAFKA_SOURCE_DIR}/rdvarint.c
${RDKAFKA_SOURCE_DIR}/rdxxhash.c
# ${RDKAFKA_SOURCE_DIR}/regexp.c
${RDKAFKA_SOURCE_DIR}/snappy.c
${RDKAFKA_SOURCE_DIR}/tinycthread.c
${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
${RDKAFKA_SOURCE_DIR}/rdgz.c
)
if(${ENABLE_CYRUS_SASL})
message (STATUS "librdkafka with SASL support")
set(SRCS
${SRCS}
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c # needed to support Kerberos, requires cyrus-sasl
)
set(WITH_SASL_CYRUS 1)
endif()
if(OPENSSL_FOUND)
message (STATUS "librdkafka with SSL support")
set(WITH_SSL 1)
if(${ENABLE_CYRUS_SASL})
set(WITH_SASL_SCRAM 1)
set(WITH_SASL_OAUTHBEARER 1)
endif()
endif()
if(WITH_SSL)
list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_ssl.c)
endif()
if(WITH_SASL_CYRUS)
list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_cyrus.c) # needed to support Kerberos, requires cyrus-sasl
endif()
if(WITH_SASL_SCRAM)
list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_scram.c)
endif()
if(WITH_SASL_OAUTHBEARER)
list(APPEND SRCS ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_oauthbearer.c)
endif()
add_library(rdkafka ${SRCS})
@ -97,7 +124,6 @@ if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY)
endif()
if(${ENABLE_CYRUS_SASL})
target_link_libraries(rdkafka PRIVATE ${CYRUS_SASL_LIBRARY})
set(WITH_SASL_CYRUS 1)
endif()
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/auxdir)

View File

@ -1,7 +1,6 @@
// Originally generated by ./configure
#ifndef _CONFIG_H_
#define _CONFIG_H_
#define ARCH "x86_64"
#define BUILT_WITH "GCC GXX PKGCONFIG OSXLD LIBDL PLUGINS ZLIB SSL SASL_CYRUS ZSTD HDRHISTOGRAM LZ4_EXT SNAPPY SOCKEM SASL_SCRAM CRC32C_HW"
#define CPU "generic"
@ -44,9 +43,9 @@
// atomic_64
#define ATOMIC_OP(OP1,OP2,PTR,VAL) __atomic_ ## OP1 ## _ ## OP2(PTR, VAL, __ATOMIC_SEQ_CST)
// parseversion
#define RDKAFKA_VERSION_STR "0.11.4"
#define RDKAFKA_VERSION_STR "1.6.0"
// parseversion
#define MKL_APP_VERSION "0.11.4"
#define MKL_APP_VERSION "1.6.0"
// libdl
#define WITH_LIBDL 1
// WITH_PLUGINS
@ -60,14 +59,14 @@
// WITH_SOCKEM
#define WITH_SOCKEM 1
// libssl
#define WITH_SSL 1
#cmakedefine WITH_SSL 1
// WITH_SASL_SCRAM
#define WITH_SASL_SCRAM 1
#cmakedefine WITH_SASL_SCRAM 1
// WITH_SASL_OAUTHBEARER
#define WITH_SASL_OAUTHBEARER 1
#cmakedefine WITH_SASL_OAUTHBEARER 1
#cmakedefine WITH_SASL_CYRUS 1
// crc32chw
#if !defined(__PPC__)
#if !defined(__PPC__) && (!defined(__aarch64__) || defined(__ARM_FEATURE_CRC32))
#define WITH_CRC32C_HW 1
#endif
// regex

2
contrib/libuv vendored

@ -1 +1 @@
Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28
Subproject commit e2e9b7e9f978ce8a1367b5fe781d97d1ce9f94ab

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.1.0) unstable; urgency=low
clickhouse (21.2.1.1) unstable; urgency=low
* Modified source code
-- Alexey Milovidov <milovidov@yandex-team.ru> Mon, 11 Jan 2021 03:51:08 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 11 Jan 2021 11:12:08 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.1.0
ARG version=21.2.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -137,7 +137,8 @@
"docker/test/stateless",
"docker/test/stateless_unbundled",
"docker/test/stateless_pytest",
"docker/test/integration/base"
"docker/test/integration/base",
"docker/test/fuzzer"
]
},
"docker/packager/unbundled": {

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.1.0
ARG version=21.2.1.*
ARG gosu_ver=1.10
# user/group precreated explicitly with fixed uid/gid on purpose.
@ -10,7 +10,6 @@ ARG gosu_ver=1.10
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
# Number 101 is used by default in openshift
RUN groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
@ -37,7 +36,12 @@ RUN groupadd -r clickhouse --gid=101 \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
&& apt-get clean
&& apt-get clean \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)
ADD https://github.com/tianon/gosu/releases/download/$gosu_ver/gosu-amd64 /bin/gosu

View File

@ -14,16 +14,18 @@ COPY alpine-root/ /
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
# Number 101 is used by default in openshift
RUN addgroup -S -g 101 clickhouse \
&& adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chown clickhouse:clickhouse /var/lib/clickhouse \
&& chmod 700 /var/lib/clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \
&& chmod 775 /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec bash
&& apk add --no-cache su-exec bash \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)
EXPOSE 9000 8123 9009

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.1.0
ARG version=21.2.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -43,6 +43,7 @@ RUN apt-get update \
clang-tidy-${LLVM_VERSION} \
cmake \
curl \
lsof \
expect \
fakeroot \
git \

View File

@ -269,6 +269,8 @@ function run_tests
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01663_aes_msan # Depends on OpenSSL
01667_aes_args_check # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
@ -330,12 +332,15 @@ function run_tests
# nc - command not found
01601_proxy_protocol
01622_defaults_for_url_engine
# JSON functions
01666_blns
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
# substr is to remove semicolon after test name
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
# We will rerun sequentially any tests that have failed during parallel run.
# They might have failed because there was some interference from other tests
@ -355,7 +360,7 @@ function run_tests
echo "Going to run again: ${FAILED_TESTS[*]}"
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
clickhouse-test --hung-check --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
else
echo "No failed tests"
fi

View File

@ -1,5 +1,5 @@
# docker build -t yandex/clickhouse-fuzzer .
FROM ubuntu:18.04
FROM yandex/clickhouse-test-base
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
@ -7,11 +7,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
bash \
ca-certificates \
curl \
gdb \
git \
libc6-dbg \
moreutils \
ncdu \

View File

@ -11,6 +11,10 @@
<max>10</max>
</max_execution_time>
<max_memory_usage>
<max>10G</max>
</max_memory_usage>
<!-- Not ready for production -->
<compile_expressions>
<readonly />

View File

@ -1,4 +1,6 @@
#!/bin/bash
# shellcheck disable=SC2086
set -eux
set -o pipefail
trap "exit" INT TERM
@ -19,12 +21,16 @@ function clone
git init
git remote add origin https://github.com/ClickHouse/ClickHouse
git fetch --depth=1 origin "$SHA_TO_TEST"
# Network is unreliable. GitHub neither.
for _ in {1..100}; do git fetch --depth=100 origin "$SHA_TO_TEST" && break; sleep 1; done
# Used to obtain the list of modified or added tests
for _ in {1..100}; do git fetch --depth=100 origin master && break; sleep 1; done
# If not master, try to fetch pull/.../{head,merge}
if [ "$PR_TO_TEST" != "0" ]
then
git fetch --depth=1 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
for _ in {1..100}; do git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" && break; sleep 1; done
fi
git checkout "$SHA_TO_TEST"
@ -33,9 +39,6 @@ function clone
function download
{
# wget -O- -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/performance/performance.tgz" \
# | tar --strip-components=1 -zxv
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"
chmod +x clickhouse
ln -s ./clickhouse ./clickhouse-server
@ -73,7 +76,19 @@ function watchdog
function fuzz
{
# Obtain the list of newly added tests. They will be fuzzed in more extreme way than other tests.
cd ch
NEW_TESTS=$(git diff --name-only "$(git merge-base origin/master "$SHA_TO_TEST"~)" "$SHA_TO_TEST" | grep -P 'tests/queries/0_stateless/.*\.sql' | sed -r -e 's!^!ch/!' | sort -R)
cd ..
if [[ -n "$NEW_TESTS" ]]
then
NEW_TESTS_OPT="--interleave-queries-file ${NEW_TESTS}"
else
NEW_TESTS_OPT=""
fi
./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
server_pid=$!
kill -0 $server_pid
while ! ./clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
@ -81,11 +96,22 @@ function fuzz
kill -0 $server_pid
echo Server started
echo "
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
continue
thread apply all backtrace
continue
" > script.gdb
gdb -batch -command script.gdb -p "$(pidof clickhouse-server)" &
fuzzer_exit_code=0
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
# SC2046: Quote this to prevent word splitting. Actually I need word splitting.
# shellcheck disable=SC2012,SC2046
./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) $NEW_TESTS_OPT \
> >(tail -n 100000 > fuzzer.log) \
2>&1 \
|| fuzzer_exit_code=$?
@ -107,7 +133,7 @@ function fuzz
case "$stage" in
"")
;&
;& # Did you know? This is "fallthrough" in bash. https://stackoverflow.com/questions/12010686/case-statement-fallthrough
"clone")
time clone
if [ -v FUZZ_LOCAL_SCRIPT ]
@ -164,16 +190,16 @@ case "$stage" in
# Lost connection to the server. This probably means that the server died
# with abort.
echo "failure" > status.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*" server.log > description.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt
then
echo "Lost connection to server. See the logs" > description.txt
echo "Lost connection to server. See the logs." > description.txt
fi
else
# Something different -- maybe the fuzzer itself died? Don't grep the
# server log in this case, because we will find a message about normal
# server termination (Received signal 15), which is confusing.
echo "failure" > status.txt
echo "Fuzzer failed ($fuzzer_exit_code). See the logs" > description.txt
echo "Fuzzer failed ($fuzzer_exit_code). See the logs." > description.txt
fi
;&
"report")

View File

@ -62,6 +62,7 @@ RUN python3 -m pip install \
avro \
cassandra-driver \
confluent-kafka \
dict2xml \
dicttoxml \
docker \
docker-compose==1.22.0 \

View File

@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import argparse
import clickhouse_driver

View File

@ -10,6 +10,23 @@ dpkg -i package_folder/clickhouse-client_*.deb
service clickhouse-server start && sleep 5
cd /sqlancer/sqlancer-master
CLICKHOUSE_AVAILABLE=true mvn -Dtest=TestClickHouse test
cp /sqlancer/sqlancer-master/target/surefire-reports/TEST-sqlancer.dbms.TestClickHouse.xml /test_output/result.xml
export TIMEOUT=60
export NUM_QUERIES=1000
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPGroupBy | tee /test_output/TLPGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPGroupBy.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPHaving | tee /test_output/TLPHaving.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPHaving.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere --oracle TLPGroupBy | tee /test_output/TLPWhereGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhereGroupBy.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPDistinct | tee /test_output/TLPDistinct.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPDistinct.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPAggregate | tee /test_output/TLPAggregate.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPAggregate.err
service clickhouse-server stop && sleep 10
ls /var/log/clickhouse-server/
tar czf /test_output/logs.tar.gz -C /var/log/clickhouse-server/ .
tail -n 1000 /var/log/clickhouse-server/stderr.log > /test_output/stderr.log
tail -n 1000 /var/log/clickhouse-server/stdout.log > /test_output/stdout.log
tail -n 1000 /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log
ls /test_output

View File

@ -53,14 +53,15 @@ function run_tests()
if [ "$NUM_TRIES" -gt "1" ]; then
ADDITIONAL_OPTIONS+=('--skip')
ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip')
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('4')
fi
for _ in $(seq 1 "$NUM_TRIES"); do
clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
if [ "${PIPESTATUS[0]}" -ne "0" ]; then
break;
fi
done
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
--test-runs "$NUM_TRIES" \
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
}
export -f run_tests

View File

@ -1,7 +1,7 @@
# docker build -t yandex/clickhouse-style-test .
FROM ubuntu:20.04
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip && pip3 install codespell
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell
CMD cd /ClickHouse/utils/check-style && \

View File

@ -16,7 +16,7 @@ $ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/inst
## Install Required Compilers, Tools, and Libraries {#install-required-compilers-tools-and-libraries}
``` bash
$ brew install cmake ninja libtool gettext
$ brew install cmake ninja libtool gettext llvm
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -0,0 +1,17 @@
---
toc_priority: 32
toc_title: Atomic
---
# Atomic {#atomic}
It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default.
## Creating a Database {#creating-a-database}
```sql
CREATE DATABASE test ENGINE = Atomic;
```
[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) <!--hide-->

View File

@ -8,14 +8,14 @@ toc_title: Introduction
Database engines allow you to work with tables.
By default, ClickHouse uses its native database engine, which provides configurable [table engines](../../engines/table-engines/index.md) and an [SQL dialect](../../sql-reference/syntax.md).
By default, ClickHouse uses database engine [Atomic](../../engines/database-engines/atomic.md). It is provides configurable [table engines](../../engines/table-engines/index.md) and an [SQL dialect](../../sql-reference/syntax.md).
You can also use the following database engines:
- [MySQL](../../engines/database-engines/mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)
- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->

View File

@ -5,9 +5,11 @@ toc_title: MaterializeMySQL
# MaterializeMySQL {#materialize-mysql}
Creates ClickHouse database with all the tables existing in MySQL, and all the data in those tables.
Creates ClickHouse database with all the tables existing in MySQL, and all the data in those tables.
ClickHouse server works as MySQL replica. It reads binlog and performs DDL and DML queries.
ClickHouse server works as MySQL replica. It reads binlog and performs DDL and DML queries.
This feature is experimental.
## Creating a Database {#creating-a-database}
@ -25,12 +27,12 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor
## Virtual columns {#virtual-columns}
When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns.
When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns.
- `_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values:
- `1` — Row is not deleted,
- `-1` — Row is deleted.
- `_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md).
- `_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values:
- `1` — Row is not deleted,
- `-1` — Row is deleted.
## Data Types Support {#data_types-support}
@ -61,9 +63,9 @@ Other types are not supported. If MySQL table contains a column of such type, Cl
MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). If ClickHouse cannot parse some DDL query, the query is ignored.
### Data Replication {#data-replication}
### Data Replication {#data-replication}
MaterializeMySQL does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication:
`MaterializeMySQL` does not support direct `INSERT`, `DELETE` and `UPDATE` queries. However, they are supported in terms of data replication:
- MySQL `INSERT` query is converted into `INSERT` with `_sign=1`.
@ -73,11 +75,11 @@ MaterializeMySQL does not support direct `INSERT`, `DELETE` and `UPDATE` queries
### Selecting from MaterializeMySQL Tables {#select}
`SELECT` query form MaterializeMySQL tables has some specifics:
`SELECT` query from `MaterializeMySQL` tables has some specifics:
- If `_version` is not specified in the `SELECT` query, [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier is used. So only rows with `MAX(_version)` are selected.
- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default, so the deleted rows are not included into the result set.
- If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not included into the result set.
### Index Conversion {#index-conversion}
@ -85,12 +87,12 @@ MySQL `PRIMARY KEY` and `INDEX` clauses are converted into `ORDER BY` tuples in
ClickHouse has only one physical order, which is determined by `ORDER BY` clause. To create a new physical order, use [materialized views](../../sql-reference/statements/create/view.md#materialized).
**Notes**
**Notes**
- Rows with `_sign=-1` are not deleted physically from the tables.
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
- Replication can be easily broken.
- Manual operations on database and tables are forbidden.
- Rows with `_sign=-1` are not deleted physically from the tables.
- Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine.
- Replication can be easily broken.
- Manual operations on database and tables are forbidden.
## Examples of Use {#examples-of-use}
@ -105,6 +107,7 @@ mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16);
mysql> UPDATE db.test SET c='Wow!', b=222;
mysql> SELECT * FROM test;
```
```text
+---+------+------+
| a | b | c |

View File

@ -51,6 +51,23 @@ All other MySQL data types are converted into [String](../../sql-reference/data-
[Nullable](../../sql-reference/data-types/nullable.md) is supported.
## Global Variables Support {#global-variables-support}
For better compatibility you may address global variables in MySQL style, as `@@identifier`.
These variables are supported:
- `version`
- `max_allowed_packet`
!!! warning "Warning"
By now these variables are stubs and don't correspond to anything.
Example:
``` sql
SELECT @@version;
```
## Examples of Use {#examples-of-use}
Table in MySQL:

View File

@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
`EmbeddedRocksDB` lets you:
## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
``` sql
@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Required parameters:
- `primary_key_name` any column name in the column list.
- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`.
- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order.
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`.
Example:
@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB
PRIMARY KEY key
```
## Description {#description}
- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) <!--hide-->

View File

@ -114,6 +114,10 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.
- `_path` — Path to the file.
- `_file` — Name of the file.
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
## S3-related settings {#settings}
The following settings can be set before query execution or placed into configuration file.
@ -124,8 +128,29 @@ The following settings can be set before query execution or placed into configur
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
**See Also**
### Endpoint-based settings {#endpointsettings}
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL):
- `endpoint` — Mandatory. Specifies prefix of an endpoint.
- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint.
- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
This configuration also applies to S3 disks in `MergeTree` table engine family.
Example:
```
<s3>
<endpoint-name>
<endpoint>https://storage.yandexcloud.net/my-test-bucket-768/</endpoint>
<!-- <access_key_id>ACCESS_KEY_ID</access_key_id> -->
<!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
<!-- <use_environment_credentials>false</use_environment_credentials> -->
<!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
</endpoint-name>
</s3>
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/s3/) <!--hide-->

View File

@ -657,6 +657,96 @@ The `default` storage policy implies using only one volume, which consists of on
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
## Using S3 for Data Storage {#table_engine-mergetree-s3}
`MergeTree` family table engines is able to store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
Configuration markup:
``` xml
<storage_configuration>
...
<disks>
<s3>
<type>s3</type>
<endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
<access_key_id>your_access_key_id</access_key_id>
<secret_access_key>your_secret_access_key</secret_access_key>
<proxy>
<uri>http://proxy1</uri>
<uri>http://proxy2</uri>
</proxy>
<connect_timeout_ms>10000</connect_timeout_ms>
<request_timeout_ms>5000</request_timeout_ms>
<max_connections>100</max_connections>
<retry_attempts>10</retry_attempts>
<min_bytes_for_seek>1000</min_bytes_for_seek>
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
<cache_enabled>true</cache_enabled>
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
<skip_access_check>false</skip_access_check>
</s3>
</disks>
...
</storage_configuration>
```
Required parameters:
- `endpoint` — S3 endpoint url in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint url should contain bucket and root path to store data.
- `access_key_id` — S3 access key id.
- `secret_access_key` — S3 secret access key.
Optional parameters:
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
- `max_connections` — S3 connections pool size. Default value is `100`.
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
- `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`.
- `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
- `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`.
S3 disk can be configured as `main` or `cold` storage:
``` xml
<storage_configuration>
...
<disks>
<s3>
<type>s3</type>
<endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
<access_key_id>your_access_key_id</access_key_id>
<secret_access_key>your_secret_access_key</secret_access_key>
</s3>
</disks>
<policies>
<s3_main>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3_main>
<s3_cold>
<volumes>
<main>
<disk>default</disk>
</main>
<external>
<disk>s3</disk>
</external>
</volumes>
<move_factor>0.2</move_factor>
</s3_cold>
</policies>
...
</storage_configuration>
```
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
### Details {#details}
In the case of `MergeTree` tables, data is getting to disk in different ways:

View File

@ -254,7 +254,6 @@ ENGINE = MergeTree()
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID)
SETTINGS index_granularity = 8192
```
``` sql
@ -450,7 +449,6 @@ ENGINE = CollapsingMergeTree(Sign)
PARTITION BY toYYYYMM(StartDate)
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID)
SETTINGS index_granularity = 8192
```
You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want.

View File

@ -110,17 +110,17 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
### Command Line Options {#command-line-options}
- `--host, -h` - The server name, localhost by default. You can use either the name or the IPv4 or IPv6 address.
- `--host, -h` The server name, localhost by default. You can use either the name or the IPv4 or IPv6 address.
- `--port` The port to connect to. Default value: 9000. Note that the HTTP interface and the native interface use different ports.
- `--user, -u` The username. Default value: default.
- `--password` The password. Default value: empty string.
- `--query, -q` The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option.
- `--queries-file, -qf` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--queries-file, -qf` file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).
- `--multiline, -m` If specified, allow multiline queries (do not send the query on Enter).
- `--multiquery, -n` If specified, allow processing multiple queries separated by semicolons.
- `--format, -f` Use the specified default format to output the result.
- `--vertical, -E` If specified, use the Vertical format by default to output the result. This is the same as format=Vertical. In this format, each value is printed on a separate line, which is helpful when displaying wide tables.
- `--vertical, -E` If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables.
- `--time, -t` If specified, print the query execution time to stderr in non-interactive mode.
- `--stacktrace` If specified, also print the stack trace if an exception occurs.
- `--config-file` The name of the configuration file.

View File

@ -515,9 +515,9 @@ Example:
## JSONAsString {#jsonasstring}
In this format, a single JSON object is interpreted as a single value. If input has several JSON objects (comma separated) they will be interpreted as a sepatate rows.
In this format, a single JSON object is interpreted as a single value. If the input has several JSON objects (comma separated) they will be interpreted as separate rows.
This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it.
This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it.
**Example**
@ -526,7 +526,7 @@ Query:
``` sql
DROP TABLE IF EXISTS json_as_string;
CREATE TABLE json_as_string (json String) ENGINE = Memory;
INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}
INSERT INTO json_as_string (json) FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}
SELECT * FROM json_as_string;
```
@ -540,7 +540,6 @@ Result:
└───────────────────────────────────┘
```
## JSONCompact {#jsoncompact}
## JSONCompactString {#jsoncompactstring}

View File

@ -107,6 +107,10 @@ Features:
[xeus-clickhouse](https://github.com/wangfenjin/xeus-clickhouse) is a Jupyter kernal for ClickHouse, which supports query CH data using SQL in Jupyter.
### MindsDB Studio {#mindsdb}
[MindsDB](https://mindsdb.com/) is an open-source AI layer for databases including ClickHouse that allows you to effortlessly develop, train and deploy state-of-the-art machine learning models. MindsDB Studio(GUI) allows you to train new models from database, interpret predictions made by the model, identify potential data biases, and evaluate and visualize model accuracy using the Explainable AI function to adapt and tune your Machine Learning models faster.
## Commercial {#commercial}
### DataGrip {#datagrip}

View File

@ -69,6 +69,9 @@ toc_title: Integrations
- Geo
- [MaxMind](https://dev.maxmind.com/geoip/)
- [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip)
- AutoML
- [MindsDB](https://mindsdb.com/)
- [MindsDB](https://github.com/mindsdb/mindsdb) - Predictive AI layer for ClickHouse database.
## Programming Language Ecosystems {#programming-language-ecosystems}

View File

@ -120,5 +120,6 @@ toc_title: Adopters
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -27,6 +27,8 @@ We recommend using SQL-driven workflow. Both of the configuration methods work s
!!! note "Warning"
You cant manage the same access entity by both configuration methods simultaneously.
To see all users, roles, profiles, etc. and all their grants use [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement) statement.
## Usage {#access-control-usage}
By default, the ClickHouse server provides the `default` user account which is not allowed using SQL-driven access control and account management but has all the rights and permissions. The `default` user account is used in any cases when the username is not defined, for example, at login from client or in distributed queries. In distributed query processing a default user account is used, if the configuration of the server or cluster doesnt specify the [user and password](../engines/table-engines/special/distributed.md) properties.

View File

@ -0,0 +1,26 @@
---
toc_priority: 65
toc_title: Caches
---
# Cache Types {#cache-types}
When performing queries, ClichHouse uses different caches.
Main cache types:
- `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
- `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
Additional cache types:
- DNS cache
- [regexp](../interfaces/formats.md#data-format-regexp) cache
- compiled expressions cache
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache
- [dictionaries data cache](../sql-reference/dictionaries/index.md)
Indirectly used:
- OS page cache
To drop cache, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md) statements.
[Original article](https://clickhouse.tech/docs/en/operations/caches/) <!--hide-->

View File

@ -25,6 +25,7 @@ Ways to configure settings, in order of priority:
- When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`.
- When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
- Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed.
Settings that can only be made in the server config file are not covered in this section.

View File

@ -0,0 +1,190 @@
# MergeTree tables settings {#merge-tree-settings}
The values of `merge_tree` settings (for all MergeTree tables) can be viewed in the table `system.merge_tree_settings`, they can be overridden in `config.xml` in the `merge_tree` section, or set in the `SETTINGS` section of each table.
Override example in `config.xml`:
``` text
<merge_tree>
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
</merge_tree>
```
An example to set in `SETTINGS` for a particular table:
``` sql
CREATE TABLE foo
(
`A` Int64
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS max_suspicious_broken_parts = 500;
```
An example of changing the settings for a specific table with the `ALTER TABLE ... MODIFY SETTING` command:
``` sql
ALTER TABLE foo
MODIFY SETTING max_suspicious_broken_parts = 100;
```
## parts_to_throw_insert {#parts-to-throw-insert}
If the number of active parts in a single partition exceeds the `parts_to_throw_insert` value, `INSERT` is interrupted with the `Too many parts (N). Merges are processing significantly slower than inserts` exception.
Possible values:
- Any positive integer.
Default value: 300.
To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree).
You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300.
## parts_to_delay_insert {#parts-to-delay-insert}
If the number of active parts in a single partition exceeds the `parts_to_delay_insert` value, an `INSERT` artificially slows down.
Possible values:
- Any positive integer.
Default value: 150.
ClickHouse artificially executes `INSERT` longer (adds sleep) so that the background merge process can merge parts faster than they are added.
## max_delay_to_insert {#max-delay-to-insert}
The value in seconds, which is used to calculate the `INSERT` delay, if the number of active parts in a single partition exceeds the [parts_to_delay_insert](#parts-to-delay-insert) value.
Possible values:
- Any positive integer.
Default value: 1.
The delay (in milliseconds) for `INSERT` is calculated by the formula:
```code
max_k = parts_to_throw_insert - parts_to_delay_insert
k = 1 + parts_count_in_partition - parts_to_delay_insert
delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k)
```
For example if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds.
## max_parts_in_total {#max-parts-in-total}
If the total number of active parts in all partitions of a table exceeds the `max_parts_in_total` value `INSERT` is interrupted with the `Too many parts (N)` exception.
Possible values:
- Any positive integer.
Default value: 100000.
A large number of parts in a table reduces performance of ClickHouse queries and increases ClickHouse boot time. Most often this is a consequence of an incorrect design (mistakes when choosing a partitioning strategy - too small partitions).
## replicated_deduplication_window {#replicated-deduplication-window}
The number of most recently inserted blocks for which Zookeeper stores hash sums to check for duplicates.
Possible values:
- Any positive integer.
- 0 (disable deduplication)
Default value: 100.
The `Insert` command creates one or more blocks (parts). When inserting into Replicated tables, ClickHouse for [insert deduplication](../../engines/table-engines/mergetree-family/replication/) writes the hash sums of the created parts into Zookeeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from Zookeeper.
A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries.
The hash sum is calculated from the composition of the field names and types and the data of the inserted part (stream of bytes).
## replicated_deduplication_window_seconds {#replicated-deduplication-window-seconds}
The number of seconds after which the hash sums of the inserted blocks are removed from Zookeeper.
Possible values:
- Any positive integer.
Default value: 604800 (1 week).
Similar to [replicated_deduplication_window](#replicated-deduplication-window), `replicated_deduplication_window_seconds` specifies how long to store hash sums of blocks for insert deduplication. Hash sums older than `replicated_deduplication_window_seconds` are removed from Zookeeper, even if they are less than ` replicated_deduplication_window`.
## old_parts_lifetime {#old-parts-lifetime}
The time (in seconds) of storing inactive parts to protect against data loss during spontaneous server reboots.
Possible values:
- Any positive integer.
Default value: 480.
`fsync` is not called for new parts, so for some time new parts exist only in the server's RAM (OS cache). If the server is rebooted spontaneously, new parts can be lost or damaged.
To protect data parts created by merges source parts are not deleted immediately. After merging several parts into a new part, ClickHouse marks the original parts as inactive and deletes them only after `old_parts_lifetime` seconds.
Inactive parts are removed if they are not used by current queries, i.e. if the `refcount` of the part is zero.
During startup ClickHouse checks the integrity of the parts.
If the merged part is damaged ClickHouse returns the inactive parts to the active list, and later merges them again. Then the damaged part is renamed (the `broken_` prefix is added) and moved to the `detached` folder.
If the merged part is not damaged, then the original inactive parts are renamed (the `ignored_` prefix is added) and moved to the `detached` folder.
The default `dirty_expire_centisecs` value (a Linux kernel setting) is 30 seconds (the maximum time that written data is stored only in RAM), but under heavy loads on the disk system data can be written much later. Experimentally, a value of 480 seconds was chosen for `old_parts_lifetime`, during which a new part is guaranteed to be written to disk.
## max_bytes_to_merge_at_max_space_in_pool {#max-bytes-to-merge-at-max-space-in-pool}
The maximum total parts size (in bytes) to be merged into one part, if there are enough resources available.
`max_bytes_to_merge_at_max_space_in_pool` -- roughly corresponds to the maximum possible part size created by an automatic background merge.
Possible values:
- Any positive integer.
Default value: 161061273600 (150 GB).
The merge scheduler periodically analyzes the sizes and number of parts in partitions, and if there is enough free resources in the pool, it starts background merges. Merges occur until the total size of the source parts is less than `max_bytes_to_merge_at_max_space_in_pool`.
Merges initiated by `optimize final` ignore `max_bytes_to_merge_at_max_space_in_pool` and merge parts only taking into account available resources (free disk's space) until one part remains in the partition.
## max_bytes_to_merge_at_min_space_in_pool {#max-bytes-to-merge-at-min-space-in-pool}
The maximum total part size (in bytes) to be merged into one part, with the minimum available resources in the background pool.
Possible values:
- Any positive integer.
Default value: 1048576 (1 MB)
`max_bytes_to_merge_at_min_space_in_pool` defines the maximum total size of parts which can be merged despite the lack of available disk space (in pool). This is necessary to reduce the number of small parts and the chance of `Too many parts` errors.
Merges book disk space by doubling the total merged parts sizes. Thus, with a small amount of free disk space, a situation may happen that there is free space, but this space is already booked by ongoing large merges, so other merges unable to start, and the number of small parts grows with every insert.
## merge_max_block_size {#merge-max-block-size}
The number of rows that are read from the merged parts into memory.
Possible values:
- Any positive integer.
Default value: 8192
Merge reads rows from parts in blocks of `merge_max_block_size` rows, then merges and writes the result into a new part. The read block is placed in RAM, so `merge_max_block_size` affects the size of the RAM required for the merge. Thus, merges can consume a large amount of RAM for tables with very wide rows (if the average row size is 100kb, then when merging 10 parts, (100kb * 10 * 8192) = ~ 8GB of RAM). By decreasing `merge_max_block_size`, you can reduce the amount of RAM required for a merge but slow down a merge.
## max_part_loading_threads {#max-part-loading-threads}
The maximum number of threads that read parts when ClickHouse starts.
Possible values:
- Any positive integer.
Default value: auto (number of CPU cores).
During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->

View File

@ -428,7 +428,7 @@ Possible values:
- `'basic'` — Use basic parser.
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
Default value: `'basic'`.
@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and
Possible values:
- `'simple'` - Simple output format.
- `simple` - Simple output format.
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone.
Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
- `'iso'` - ISO output format.
- `iso` - ISO output format.
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC).
Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
- `'unix_timestamp'` - Unix timestamp output format.
- `unix_timestamp` - Unix timestamp output format.
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`.
Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
Default value: `'simple'`.
Default value: `simple`.
See also:
@ -2134,6 +2134,21 @@ Default value: `1`.
- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md#optimize_read_in_order)
## optimize_aggregation_in_order {#optimize_aggregation_in_order}
Enables [GROUP BY](../../sql-reference/statements/select/group-by.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for aggregating data in corresponding order in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
Possible values:
- 0 — `GROUP BY` optimization is disabled.
- 1 — `GROUP BY` optimization is enabled.
Default value: `0`.
**See Also**
- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md#aggregation-in-order)
## mutations_sync {#mutations_sync}
Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql-reference/statements/alter/index.md#mutations)) synchronously.
@ -2474,7 +2489,6 @@ Possible values:
Default value: `0`.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
@ -2491,11 +2505,7 @@ Default value: 0.
Consider the following query with aggregate functions:
```sql
SELECT
SUM(-1),
MAX(0)
FROM system.one
WHERE 0
SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
```
With `aggregate_functions_null_for_empty = 0` it would produce:
@ -2512,7 +2522,6 @@ With `aggregate_functions_null_for_empty = 1` the result would be:
└───────────────┴──────────────┘
```
## union_default_mode {#union-default-mode}
Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`.
@ -2527,7 +2536,6 @@ Default value: `''`.
See examples in [UNION](../../sql-reference/statements/select/union.md).
## data_type_default_nullable {#data_type_default_nullable}
Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
@ -2539,7 +2547,6 @@ Possible values:
Default value: `0`.
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Enables special logic to perform merges on replicas.
@ -2559,4 +2566,15 @@ High values for that threshold may lead to replication delays.
It can be useful when merges are CPU bounded not IO bounded (performing heavy data compression, calculating aggregate functions or default expressions that require a large amount of calculations, or just very high number of tiny merges).
## max_final_threads {#max-final-threads}
Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
Possible values:
- Positive integer.
- 0 or 1 — Disabled. `SELECT` queries are executed in a single thread.
Default value: `16`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -7,16 +7,16 @@ Columns:
- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Quota ID.
- `storage`([String](../../sql-reference/data-types/string.md)) — Storage of quotas. Possible value: “users.xml” if a quota configured in the users.xml file, “disk” if a quota configured by an SQL-query.
- `keys` ([Array](../../sql-reference/data-types/array.md)([Enum8](../../sql-reference/data-types/enum.md))) — Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values:
- `[]` — All users share the same quota.
- `['user_name']` — Connections with the same user name share the same quota.
- `['ip_address']` — Connections from the same IP share the same quota.
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `user_name`.
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `ip_address`.
- `[]` — All users share the same quota.
- `['user_name']` — Connections with the same user name share the same quota.
- `['ip_address']` — Connections from the same IP share the same quota.
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `user_name`.
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `ip_address`.
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
- `0` — The quota applies to users specify in the `apply_to_list`.
- `1` — The quota applies to all users except those listed in `apply_to_except`.
- `0` — The quota applies to users specify in the `apply_to_list`.
- `1` — The quota applies to all users except those listed in `apply_to_except`.
- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to.
- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to.

View File

@ -7,7 +7,7 @@ toc_title: clickhouse-benchmark
Connects to a ClickHouse server and repeatedly sends specified queries.
Syntax:
**Syntax**
``` bash
$ clickhouse-benchmark --query ["single query"] [keys]
@ -28,35 +28,35 @@ $ clickhouse-benchmark [keys] <<< "single query"
If you want to send a set of queries, create a text file and place each query on the individual string in this file. For example:
``` sql
SELECT * FROM system.numbers LIMIT 10000000
SELECT 1
SELECT * FROM system.numbers LIMIT 10000000;
SELECT 1;
```
Then pass this file to a standard input of `clickhouse-benchmark`.
Then pass this file to a standard input of `clickhouse-benchmark`:
``` bash
clickhouse-benchmark [keys] < queries_file
clickhouse-benchmark [keys] < queries_file;
```
## Keys {#clickhouse-benchmark-keys}
- `--query=WORD` - Query to execute. If this parameter is not passed clickhouse-benchmark will read queries from standard input.
- `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input.
- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1.
- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1.
- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys.
- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys.
- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys.
- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever).
- `-r`, `--randomize` — Random order of queries execution if there is more then one input query.
- `-s`, `--secure` — Using TLS connection.
- `-r`, `--randomize` — Random order of queries execution if there is more than one input query.
- `-s`, `--secure` — Using `TLS` connection.
- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled).
- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Students t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) test to determine whether the two distributions arent different with the selected level of confidence.
- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Students t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions arent different with the selected level of confidence.
- `--cumulative` — Printing cumulative data instead of data per interval.
- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`.
- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file.
- `--json=FILEPATH``JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file.
- `--user=USERNAME` — ClickHouse user name. Default value: `default`.
- `--password=PSWD` — ClickHouse user password. Default value: empty string.
- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions.
- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`.
- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns an answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`.
- `--help` — Shows the help message.
If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--<session setting name>= SETTING_VALUE`. For example, `--max_memory_usage=1048576`.
@ -96,11 +96,11 @@ In the report you can find:
- Endpoint of ClickHouse server.
- Number of processed queries.
- QPS: QPS: How many queries server performed per second during a period specified in the `--delay` argument.
- RPS: How many rows server read per second during a period specified in the `--delay` argument.
- MiB/s: How many mebibytes server read per second during a period specified in the `--delay` argument.
- result RPS: How many rows placed by server to the result of a query per second during a period specified in the `--delay` argument.
- result MiB/s. How many mebibytes placed by server to the result of a query per second during a period specified in the `--delay` argument.
- QPS: How many queries the server performed per second during a period specified in the `--delay` argument.
- RPS: How many rows the server reads per second during a period specified in the `--delay` argument.
- MiB/s: How many mebibytes the server reads per second during a period specified in the `--delay` argument.
- result RPS: How many rows placed by the server to the result of a query per second during a period specified in the `--delay` argument.
- result MiB/s. How many mebibytes placed by the server to the result of a query per second during a period specified in the `--delay` argument.
- Percentiles of queries execution time.
@ -159,3 +159,5 @@ localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, resu
99.900% 0.172 sec.
99.990% 0.172 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark.md) <!--hide-->

View File

@ -71,8 +71,8 @@ Parameters:
<remote_servers>
<source_cluster>
<!--
source cluster & destination clusters accepts exactly the same
parameters as parameters for usual Distributed table
source cluster & destination clusters accept exactly the same
parameters as parameters for the usual Distributed table
see https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
-->
<shard>

View File

@ -16,7 +16,7 @@ By default `clickhouse-local` does not have access to data on the same host, but
!!! warning "Warning"
It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
For temporary data, a unique temporary data directory is created by default. If you want to override this behavior, the data directory can be explicitly specified with the `-- --path` option.
For temporary data, a unique temporary data directory is created by default.
## Usage {#usage}
@ -32,15 +32,22 @@ Arguments:
- `-S`, `--structure` — table structure for input data.
- `-if`, `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-q` `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
- `-qf` `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
- `-qf`, `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `-of`, `--format`, `--output-format` — output format, `TSV` by default.
- `-d`, `--database` — default database, `_local` by default.
- `--stacktrace` — whether to dump debug output in case of exception.
- `--echo` — print query before execution.
- `--verbose` — more details on query execution.
- `-s` — disables `stderr` logging.
- `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty.
- `--logger.console` — Log to console.
- `--logger.log` — Log file name.
- `--logger.level` — Log level.
- `--ignore-error` — do not stop processing if a query failed.
- `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty.
- `--no-system-tables` — do not attach system tables.
- `--help` — arguments references for `clickhouse-local`.
- `-V`, `--version` — print version information and exit.
Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`.

View File

@ -4,6 +4,28 @@ toc_priority: 106
# argMax {#agg-function-argmax}
Syntax: `argMax(arg, val)`
Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))`
Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output.
Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`.
**Example:**
``` text
┌─user─────┬─salary─┐
│ director │ 5000 │
│ manager │ 3000 │
│ worker │ 1000 │
└──────────┴────────┘
```
``` sql
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary
```
``` text
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
│ director │ ('director',5000) │
└──────────────────────┴─────────────────────────────┘
```

View File

@ -4,10 +4,12 @@ toc_priority: 105
# argMin {#agg-function-argmin}
Syntax: `argMin(arg, val)`
Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))`
Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output.
Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`.
**Example:**
``` text
@ -19,11 +21,11 @@ Calculates the `arg` value for a minimal `val` value. If there are several diffe
```
``` sql
SELECT argMin(user, salary) FROM salary
SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary
```
``` text
┌─argMin(user, salary)─┐
│ worker │
└──────────────────────┘
┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─
│ worker │ ('worker',1000) │
└──────────────────────┴─────────────────────────────
```

View File

@ -25,22 +25,22 @@ The following table lists cases when query feature works in ClickHouse, but beha
|------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **E011** | **Numeric data types** | **Partial**{.text-warning} | |
| E011-01 | INTEGER and SMALLINT data types | Yes {.text-success} | |
| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Partial {.text-warning} | `FLOAT(<binary_precision>)`, `REAL` and `DOUBLE PRECISION` are not supported |
| E011-03 | DECIMAL and NUMERIC data types | Partial {.text-warning} | Only `DECIMAL(p,s)` is supported, not `NUMERIC` |
| E011-02 | REAL, DOUBLE PRECISION and FLOAT data types data types | Yes {.text-success} | |
| E011-03 | DECIMAL and NUMERIC data types | Yes {.text-success} | |
| E011-04 | Arithmetic operators | Yes {.text-success} | |
| E011-05 | Numeric comparison | Yes {.text-success} | |
| E011-06 | Implicit casting among the numeric data types | No {.text-danger} | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
| **E021** | **Character string types** | **Partial**{.text-warning} | |
| E021-01 | CHARACTER data type | No {.text-danger} | |
| E021-02 | CHARACTER VARYING data type | No {.text-danger} | `String` behaves similarly, but without length limit in parentheses |
| E021-03 | Character literals | Partial {.text-warning} | No automatic concatenation of consecutive literals and character set support |
| E021-01 | CHARACTER data type | Yes {.text-success} | |
| E021-02 | CHARACTER VARYING data type | Yes {.text-success} | |
| E021-03 | Character literals | Yes {.text-success} | |
| E021-04 | CHARACTER_LENGTH function | Partial {.text-warning} | No `USING` clause |
| E021-05 | OCTET_LENGTH function | No {.text-danger} | `LENGTH` behaves similarly |
| E021-06 | SUBSTRING | Partial {.text-warning} | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant |
| E021-07 | Character concatenation | Partial {.text-warning} | No `COLLATE` clause |
| E021-08 | UPPER and LOWER functions | Yes {.text-success} | |
| E021-09 | TRIM function | Yes {.text-success} | |
| E021-10 | Implicit casting among the fixed-length and variable-length character string types | No {.text-danger} | ANSI SQL allows arbitrary implicit cast between string types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
| E021-10 | Implicit casting among the fixed-length and variable-length character string types | Partial | ANSI SQL allows arbitrary implicit cast between string types, while ClickHouse relies on functions having multiple overloads instead of implicit cast |
| E021-11 | POSITION function | Partial {.text-warning} | No support for `IN` and `USING` clauses, no `POSITION_REGEX` variant |
| E021-12 | Character comparison | Yes {.text-success} | |
| **E031** | **Identifiers** | **Partial**{.text-warning} | |
@ -71,20 +71,20 @@ The following table lists cases when query feature works in ClickHouse, but beha
| E061-13 | Correlated subqueries | No {.text-danger} | |
| E061-14 | Search condition | Yes {.text-success} | |
| **E071** | **Basic query expressions** | **Partial**{.text-warning} | |
| E071-01 | UNION DISTINCT table operator | No {.text-danger} | |
| E071-01 | UNION DISTINCT table operator | Yes {.text-success} | |
| E071-02 | UNION ALL table operator | Yes {.text-success} | |
| E071-03 | EXCEPT DISTINCT table operator | No {.text-danger} | |
| E071-05 | Columns combined via table operators need not have exactly the same data type | Yes {.text-success} | |
| E071-06 | Table operators in subqueries | Yes {.text-success} | |
| **E081** | **Basic privileges** | **Partial**{.text-warning} | Work in progress |
| E081-01 | SELECT privilege at the table level | | |
| **E081** | **Basic privileges** | **Yes**{.text-success} | |
| E081-01 | SELECT privilege at the table level | Yes {.text-success} | |
| E081-02 | DELETE privilege | | |
| E081-03 | INSERT privilege at the table level | | |
| E081-04 | UPDATE privilege at the table level | | |
| E081-03 | INSERT privilege at the table level | Yes {.text-success} | |
| E081-04 | UPDATE privilege at the table level | Yes {.text-success} | |
| E081-05 | UPDATE privilege at the column level | | |
| E081-06 | REFERENCES privilege at the table level | | |
| E081-07 | REFERENCES privilege at the column level | | |
| E081-08 | WITH GRANT OPTION | | |
| E081-08 | WITH GRANT OPTION | Yes {.text-success} | |
| E081-09 | USAGE privilege | | |
| E081-10 | EXECUTE privilege | | |
| **E091** | **Set functions** | **Yes**{.text-success} | |
@ -93,28 +93,28 @@ The following table lists cases when query feature works in ClickHouse, but beha
| E091-03 | MAX | Yes {.text-success} | |
| E091-04 | MIN | Yes {.text-success} | |
| E091-05 | SUM | Yes {.text-success} | |
| E091-06 | ALL quantifier | No {.text-danger} | |
| E091-07 | DISTINCT quantifier | Partial {.text-warning} | Not all aggregate functions supported |
| E091-06 | ALL quantifier | Yes {.text-success} | |
| E091-07 | DISTINCT quantifier | Yes {.text-success} | Not all aggregate functions supported |
| **E101** | **Basic data manipulation** | **Partial**{.text-warning} | |
| E101-01 | INSERT statement | Yes {.text-success} | Note: primary key in ClickHouse does not imply the `UNIQUE` constraint |
| E101-03 | Searched UPDATE statement | No {.text-danger} | Theres an `ALTER UPDATE` statement for batch data modification |
| E101-04 | Searched DELETE statement | No {.text-danger} | Theres an `ALTER DELETE` statement for batch data removal |
| E101-03 | Searched UPDATE statement | Partial | Theres an `ALTER UPDATE` statement for batch data modification |
| E101-04 | Searched DELETE statement | Partial | Theres an `ALTER DELETE` statement for batch data removal |
| **E111** | **Single row SELECT statement** | **No**{.text-danger} | |
| **E121** | **Basic cursor support** | **No**{.text-danger} | |
| E121-01 | DECLARE CURSOR | No {.text-danger} | |
| E121-02 | ORDER BY columns need not be in select list | No {.text-danger} | |
| E121-03 | Value expressions in ORDER BY clause | No {.text-danger} | |
| E121-02 | ORDER BY columns need not be in select list | Yes {.text-success} | |
| E121-03 | Value expressions in ORDER BY clause | Yes {.text-success} | |
| E121-04 | OPEN statement | No {.text-danger} | |
| E121-06 | Positioned UPDATE statement | No {.text-danger} | |
| E121-07 | Positioned DELETE statement | No {.text-danger} | |
| E121-08 | CLOSE statement | No {.text-danger} | |
| E121-10 | FETCH statement: implicit NEXT | No {.text-danger} | |
| E121-17 | WITH HOLD cursors | No {.text-danger} | |
| **E131** | **Null value support (nulls in lieu of values)** | **Partial**{.text-warning} | Some restrictions apply |
| **E131** | **Null value support (nulls in lieu of values)** | **Yes**{.text-success} | Some restrictions apply |
| **E141** | **Basic integrity constraints** | **Partial**{.text-warning} | |
| E141-01 | NOT NULL constraints | Yes {.text-success} | Note: `NOT NULL` is implied for table columns by default |
| E141-02 | UNIQUE constraint of NOT NULL columns | No {.text-danger} | |
| E141-03 | PRIMARY KEY constraints | No {.text-danger} | |
| E141-03 | PRIMARY KEY constraints | Partial | |
| E141-04 | Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action | No {.text-danger} | |
| E141-06 | CHECK constraint | Yes {.text-success} | |
| E141-07 | Column defaults | Yes {.text-success} | |
@ -126,7 +126,7 @@ The following table lists cases when query feature works in ClickHouse, but beha
| **E152** | **Basic SET TRANSACTION statement** | **No**{.text-danger} | |
| E152-01 | SET TRANSACTION statement: ISOLATION LEVEL SERIALIZABLE clause | No {.text-danger} | |
| E152-02 | SET TRANSACTION statement: READ ONLY and READ WRITE clauses | No {.text-danger} | |
| **E153** | **Updatable queries with subqueries** | **No**{.text-danger} | |
| **E153** | **Updatable queries with subqueries** | **Yes**{.text-success} | |
| **E161** | **SQL comments using leading double minus** | **Yes**{.text-success} | |
| **E171** | **SQLSTATE support** | **No**{.text-danger} | |
| **E182** | **Host language binding** | **No**{.text-danger} | |
@ -134,7 +134,7 @@ The following table lists cases when query feature works in ClickHouse, but beha
| F031-01 | CREATE TABLE statement to create persistent base tables | Partial {.text-warning} | No `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` clauses and no support for user resolved data types |
| F031-02 | CREATE VIEW statement | Partial {.text-warning} | No `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` clauses and no support for user resolved data types |
| F031-03 | GRANT statement | Yes {.text-success} | |
| F031-04 | ALTER TABLE statement: ADD COLUMN clause | Partial {.text-warning} | No support for `GENERATED` clause and system time period |
| F031-04 | ALTER TABLE statement: ADD COLUMN clause | Yes {.text-success} | No support for `GENERATED` clause and system time period |
| F031-13 | DROP TABLE statement: RESTRICT clause | No {.text-danger} | |
| F031-16 | DROP VIEW statement: RESTRICT clause | No {.text-danger} | |
| F031-19 | REVOKE statement: RESTRICT clause | No {.text-danger} | |
@ -147,11 +147,11 @@ The following table lists cases when query feature works in ClickHouse, but beha
| F041-07 | The inner table in a left or right outer join can also be used in an inner join | Yes {.text-success} | |
| F041-08 | All comparison operators are supported (rather than just =) | No {.text-danger} | |
| **F051** | **Basic date and time** | **Partial**{.text-warning} | |
| F051-01 | DATE data type (including support of DATE literal) | Partial {.text-warning} | No literal |
| F051-01 | DATE data type (including support of DATE literal) | Yes {.text-success} | |
| F051-02 | TIME data type (including support of TIME literal) with fractional seconds precision of at least 0 | No {.text-danger} | |
| F051-03 | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6 | No {.text-danger} | `DateTime64` time provides similar functionality |
| F051-04 | Comparison predicate on DATE, TIME, and TIMESTAMP data types | Partial {.text-warning} | Only one data type available |
| F051-05 | Explicit CAST between datetime types and character string types | Yes {.text-success} | |
| F051-03 | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6 | Yes {.text-success} | |
| F051-04 | Comparison predicate on DATE, TIME, and TIMESTAMP data types | Yes {.text-success} | |
| F051-05 | Explicit CAST between datetime types and character string types | Yes {.text-success} | |
| F051-06 | CURRENT_DATE | No {.text-danger} | `today()` is similar |
| F051-07 | LOCALTIME | No {.text-danger} | `now()` is similar |
| F051-08 | LOCALTIMESTAMP | No {.text-danger} | |
@ -171,7 +171,7 @@ The following table lists cases when query feature works in ClickHouse, but beha
| F261-03 | NULLIF | Yes {.text-success} | |
| F261-04 | COALESCE | Yes {.text-success} | |
| **F311** | **Schema definition statement** | **Partial**{.text-warning} | |
| F311-01 | CREATE SCHEMA | No {.text-danger} | |
| F311-01 | CREATE SCHEMA | Partial {.text-danger} | See CREATE DATABASE |
| F311-02 | CREATE TABLE for persistent base tables | Yes {.text-success} | |
| F311-03 | CREATE VIEW | Yes {.text-success} | |
| F311-04 | CREATE VIEW: WITH CHECK OPTION | No {.text-danger} | |

View File

@ -9,11 +9,18 @@ toc_title: Float32, Float64
Types are equivalent to types of C:
- `Float32` - `float`
- `Float64` - `double`
- `Float32` `float`.
- `Float64` `double`.
We recommend that you store data in integer form whenever possible. For example, convert fixed precision numbers to integer values, such as monetary amounts or page load times in milliseconds.
Aliases:
- `Float32``FLOAT`.
- `Float64``DOUBLE`.
When creating tables, numeric parameters for floating point numbers can be set (e.g. `FLOAT(12)`, `FLOAT(15, 22)`, `DOUBLE(12)`, `DOUBLE(4, 18)`), but ClickHouse ignores them.
## Using Floating-point Numbers {#using-floating-point-numbers}
- Computations with floating-point numbers might produce a rounding error.
@ -52,7 +59,7 @@ SELECT 0.5 / 0
└────────────────┘
```
- `-Inf` Negative infinity.
- `-Inf` Negative infinity.
<!-- -->
@ -66,7 +73,7 @@ SELECT -0.5 / 0
└─────────────────┘
```
- `NaN` Not a number.
- `NaN` Not a number.
<!-- -->
@ -80,6 +87,6 @@ SELECT 0 / 0
└──────────────┘
```
See the rules for `NaN` sorting in the section [ORDER BY clause](../sql_reference/statements/select/order-by.md).
See the rules for `NaN` sorting in the section [ORDER BY clause](../../sql-reference/statements/select/order-by.md).
[Original article](https://clickhouse.tech/docs/en/data_types/float/) <!--hide-->

View File

@ -7,23 +7,32 @@ toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, In
Fixed-length integers, with or without a sign.
When creating tables, numeric parameters for integer numbers can be set (e.g. `TINYINT(8)`, `SMALLINT(16)`, `INT(32)`, `BIGINT(64)`), but ClickHouse ignores them.
## Int Ranges {#int-ranges}
- Int8 - \[-128 : 127\]
- Int16 - \[-32768 : 32767\]
- Int32 - \[-2147483648 : 2147483647\]
- Int64 - \[-9223372036854775808 : 9223372036854775807\]
- Int128 - \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
- Int256 - \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
- `Int8` — \[-128 : 127\]
- `Int16` — \[-32768 : 32767\]
- `Int32` — \[-2147483648 : 2147483647\]
- `Int64` — \[-9223372036854775808 : 9223372036854775807\]
- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
Aliases:
- `Int8``TINYINT`, `BOOL`, `BOOLEAN`, `INT1`.
- `Int16``SMALLINT`, `INT2`.
- `Int32``INT`, `INT4`, `INTEGER`.
- `Int64``BIGINT`.
## Uint Ranges {#uint-ranges}
- UInt8 - \[0 : 255\]
- UInt16 - \[0 : 65535\]
- UInt32 - \[0 : 4294967295\]
- UInt64 - \[0 : 18446744073709551615\]
- UInt256 - \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
- `UInt8` \[0 : 255\]
- `UInt16` \[0 : 65535\]
- `UInt32` \[0 : 4294967295\]
- `UInt64` \[0 : 18446744073709551615\]
- `UInt256` \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
UInt128 is not supported yet.
`UInt128` is not supported yet.
[Original article](https://clickhouse.tech/docs/en/data_types/int_uint/) <!--hide-->

View File

@ -0,0 +1,29 @@
---
toc_priority: 61
toc_title: Multiword Type Names
---
# Multiword Types {#multiword-types}
When creating tables, you can use data types with a name consisting of several words. This is implemented for better SQL compatibility.
## Multiword Types Support {#multiword-types-support}
| Multiword types | Simple types |
|----------------------------------|--------------------------------------------------------------|
| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) |
| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) |
| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) |
| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) |
| BINARY VARYING | [String](../../sql-reference/data-types/string.md) |
[Original article](https://clickhouse.tech/docs/en/sql-reference/data-types/multiword-types/) <!--hide-->

View File

@ -18,6 +18,8 @@ The following aggregate functions are supported:
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md)
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.

View File

@ -8,6 +8,8 @@ toc_title: String
Strings of an arbitrary length. The length is not limited. The value can contain an arbitrary set of bytes, including null bytes.
The String type replaces the types VARCHAR, BLOB, CLOB, and others from other DBMSs.
When creating tables, numeric parameters for string fields can be set (e.g. `VARCHAR(255)`), but ClickHouse ignores them.
## Encodings {#encodings}
ClickHouse doesnt have the concept of encodings. Strings can contain an arbitrary set of bytes, which are stored and output as-is.

View File

@ -93,7 +93,7 @@ Setting fields:
- `path` The absolute path to the file.
- `format` The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
When dictionary with FILE source is created via DDL command (`CREATE DICTIONARY ...`), source of the dictionary have to be located in `user_files` directory, to prevent DB users accessing arbitrary file on clickhouse node.
When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in `user_files` directory, to prevent DB users accessing arbitrary file on ClickHouse node.
## Executable File {#dicts-external_dicts_dict_sources-executable}
@ -115,7 +115,7 @@ Setting fields:
- `command` The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on clickhouse node.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
## Http(s) {#dicts-external_dicts_dict_sources-http}
@ -160,14 +160,14 @@ Setting fields:
- `url` The source URL.
- `format` The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `credentials` Basic HTTP authentication. Optional parameter.
- `user` Username required for the authentication.
- `password` Password required for the authentication.
- `user` Username required for the authentication.
- `password` Password required for the authentication.
- `headers` All custom HTTP headers entries used for the HTTP request. Optional parameter.
- `header` Single HTTP header entry.
- `name` Identifiant name used for the header send on the request.
- `value` Value set for a specific identifiant name.
- `header` Single HTTP header entry.
- `name` Identifiant name used for the header send on the request.
- `value` Value set for a specific identifiant name.
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries checked with the `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.
## ODBC {#dicts-external_dicts_dict_sources-odbc}

View File

@ -622,7 +622,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
| %e | day of the month, space-padded ( 1-31) | 2 |
| %e | day of the month, space-padded ( 1-31) | 2 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
@ -633,6 +633,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %M | minute (00-59) | 33 |
| %n | new-line character () | |
| %p | AM or PM designation | PM |
| %Q | Quarter (1-4) | 1 |
| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 |
| %S | second (00-59) | 44 |
| %t | horizontal-tab character () | |

View File

@ -243,4 +243,33 @@ SELECT
└───────────────────────────────────┴──────────────────────────────────┘
```
## isIPv4String
Determines if the input string is an IPv4 address or not. Returns `1` if true `0` otherwise.
``` sql
SELECT isIPv4String('127.0.0.1')
```
``` text
┌─isIPv4String('127.0.0.1')─┐
│ 1 │
└───────────────────────────┘
```
## isIPv6String
Determines if the input string is an IPv6 address or not. Returns `1` if true `0` otherwise.
``` sql
SELECT isIPv6String('2001:438:ffff::407d:1bc1')
```
``` text
┌─isIPv6String('2001:438:ffff::407d:1bc1')─┐
│ 1 │
└──────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) <!--hide-->

View File

@ -413,4 +413,68 @@ Result:
- [log(x)](../../sql-reference/functions/math-functions.md#logx-lnx)
## sign(x) {#signx}
The `sign` function can extract the sign of a real number.
**Syntax**
``` sql
sign(x)
```
**Parameters**
- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse.
**Returned value**
- -1 for `x < 0`
- 0 for `x = 0`
- 1 for `x > 0`
**Example**
Query:
``` sql
SELECT sign(0);
```
Result:
``` text
┌─sign(0)─┐
│ 0 │
└─────────┘
```
Query:
``` sql
SELECT sign(1);
```
Result:
``` text
┌─sign(1)─┐
│ 1 │
└─────────┘
```
Query:
``` sql
SELECT sign(-1);
```
Result:
``` text
┌─sign(-1)─┐
│ -1 │
└──────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/math_functions/) <!--hide-->

View File

@ -1468,7 +1468,7 @@ Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many.
## identity {#identity}
Returns the same value that was used as its argument. Used for debugging and testing, allows to cancel using index, and get the query performance of a full scan. When query is analyzed for possible use of index, the analyzer doesnt look inside `identity` functions.
Returns the same value that was used as its argument. Used for debugging and testing, allows to cancel using index, and get the query performance of a full scan. When query is analyzed for possible use of index, the analyzer doesnt look inside `identity` functions. Also constant folding is not applied too.
**Syntax**

View File

@ -574,7 +574,7 @@ encodeXMLComponent(x)
- `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md).
**Returned value(s)**
**Returned value**
- The sequence of characters with escape characters.

View File

@ -5,16 +5,35 @@ toc_title: QUOTA
# ALTER QUOTA {#alter-quota-statement}
Changes quotas.
Changes [quotas](../../../operations/access-rights.md#quotas-management).
Syntax:
``` sql
ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
[FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
{MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
**Examples**
Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
``` sql
ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
```
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```

View File

@ -10,7 +10,7 @@ Changes roles.
Syntax:
``` sql
ALTER ROLE [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
```

View File

@ -10,8 +10,8 @@ Changes row policy.
Syntax:
``` sql
ALTER [ROW] POLICY [IF EXISTS] name [ON CLUSTER cluster_name] ON [database.]table
[RENAME TO new_name]
ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] ON [database2.]table2 [RENAME TO new_name2] ...]
[AS {PERMISSIVE | RESTRICTIVE}]
[FOR SELECT]
[USING {condition | NONE}][,...]

View File

@ -10,7 +10,7 @@ Changes settings profiles.
Syntax:
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name [ON CLUSTER cluster_name]
[RENAME TO new_name]
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
```

View File

@ -10,8 +10,8 @@ Changes ClickHouse user accounts.
Syntax:
``` sql
ALTER USER [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}]
[[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]

View File

@ -11,19 +11,29 @@ Syntax:
``` sql
CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
[KEYED BY {'none' | 'user name' | 'ip address' | 'forwarded ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}]
[FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR}
{MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] |
[KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED]
[FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year}
{MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] |
NO LIMITS | TRACKING ONLY} [,...]]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
```
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
## Example {#create-quota-example}
**Examples**
Limit the maximum number of queries for the current user with 123 queries in 15 months constraint:
``` sql
CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER
CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
```
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
``` sql
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
```

View File

@ -5,12 +5,12 @@ toc_title: ROLE
# CREATE ROLE {#create-role-statement}
Creates a new [role](../../../operations/access-rights.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role.
Creates new [roles](../../../operations/access-rights.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role.
Syntax:
``` sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
```

View File

@ -5,16 +5,17 @@ toc_title: ROW POLICY
# CREATE ROW POLICY {#create-row-policy-statement}
Creates a [filter for rows](../../../operations/access-rights.md#row-policy-management), which a user can read from a table.
Creates [filters for rows](../../../operations/access-rights.md#row-policy-management), which a user can read from a table.
Syntax:
``` sql
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name [ON CLUSTER cluster_name] ON [db.]table
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1
[, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...]
[AS {PERMISSIVE | RESTRICTIVE}]
[FOR SELECT]
[USING condition]
[TO {role [,...] | ALL | ALL EXCEPT role [,...]}]
[TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}]
```
`ON CLUSTER` clause allows creating row policies on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -5,12 +5,13 @@ toc_title: SETTINGS PROFILE
# CREATE SETTINGS PROFILE {#create-settings-profile-statement}
Creates a [settings profile](../../../operations/access-rights.md#settings-profiles-management) that can be assigned to a user or a role.
Creates [settings profiles](../../../operations/access-rights.md#settings-profiles-management) that can be assigned to a user or a role.
Syntax:
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name [ON CLUSTER cluster_name]
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
```

View File

@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
```
Creates a table named `name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
The structure of the table is a list of column descriptions, secondary indexes and constraints . If primary key is supported by the engine, it will be indicated as parameter for the table engine.
The structure of the table is a list of column descriptions, secondary indexes and constraints . If [primary key](#primary-key) is supported by the engine, it will be indicated as parameter for the table engine.
A column description is `name type` in the simplest case. Example: `RegionID UInt32`.
@ -39,13 +39,13 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine]
Creates a table with the same structure as another table. You can specify a different engine for the table. If the engine is not specified, the same engine will be used as for the `db2.name2` table.
## From a Table Function {#from-a-table-function}
### From a Table Function {#from-a-table-function}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
```
Creates a table with the structure and data returned by a [table function](../../../sql-reference/table-functions/index.md#table-functions).
Creates a table with the same result as that of the [table function](../../../sql-reference/table-functions/index.md#table-functions) specified. The created table will also work in the same way as the corresponding table function that was specified.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
@ -111,7 +111,7 @@ It is not possible to set default values for elements in nested data structures.
You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways:
- inside the column list
- Inside the column list
``` sql
CREATE TABLE db.table_name
@ -122,7 +122,7 @@ CREATE TABLE db.table_name
ENGINE = engine;
```
- outside the column list
- Outside the column list
``` sql
CREATE TABLE db.table_name
@ -133,7 +133,8 @@ ENGINE = engine
PRIMARY KEY(expr1[, expr2,...]);
```
You can't combine both ways in one query.
!!! warning "Warning"
You can't combine both ways in one query.
## Constraints {#constraints}
@ -259,3 +260,78 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
In most cases, temporary tables are not created manually, but when using external data for a query, or for distributed `(GLOBAL) IN`. For more information, see the appropriate sections
Its possible to use tables with [ENGINE = Memory](../../../engines/table-engines/special/memory.md) instead of temporary tables.
## REPLACE TABLE {#replace-table-query}
'REPLACE' query allows you to update the table atomically.
!!!note "Note"
This query is supported only for [Atomic](../../../engines/database-engines/atomic.md) database engine.
If you need to delete some data from a table, you can create a new table and fill it with a `SELECT` statement that doesn't retrieve unwanted data, then drop the old table and rename the new one:
```sql
CREATE TABLE myNewTable AS myOldTable;
INSERT INTO myNewTable SELECT * FROM myOldTable WHERE CounterID <12345;
DROP TABLE myOldTable;
RENAME TABLE myNewTable TO myOldTable;
```
Instead of above, you can use the following:
```sql
REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345;
```
### Syntax
{CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name
All syntax forms for `CREATE` query also work for this query. `REPLACE` for a non-existent table will cause an error.
### Examples:
Consider the table:
```sql
CREATE DATABASE base ENGINE = Atomic;
CREATE OR REPLACE TABLE base.t1 (n UInt64, s String) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (1, 'test');
SELECT * FROM base.t1;
```
```text
┌─n─┬─s────┐
│ 1 │ test │
└───┴──────┘
```
Using `REPLACE` query to clear all data:
```sql
CREATE OR REPLACE TABLE base.t1 (n UInt64, s Nullable(String)) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (2, null);
SELECT * FROM base.t1;
```
```text
┌─n─┬─s──┐
│ 2 │ \N │
└───┴────┘
```
Using `REPLACE` query to change table structure:
```sql
REPLACE TABLE base.t1 (n UInt64) ENGINE = MergeTree ORDER BY n;
INSERT INTO base.t1 VALUES (3);
SELECT * FROM base.t1;
```
```text
┌─n─┐
│ 3 │
└───┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/table) <!--hide-->

View File

@ -5,12 +5,13 @@ toc_title: USER
# CREATE USER {#create-user-statement}
Creates a [user account](../../../operations/access-rights.md#user-account-management).
Creates [user accounts](../../../operations/access-rights.md#user-account-management).
Syntax:
``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...]]
@ -69,7 +70,7 @@ CREATE USER john DEFAULT ROLE role1, role2
Create the user account `john` and make all his future roles default:
``` sql
ALTER USER user DEFAULT ROLE ALL
CREATE USER user DEFAULT ROLE ALL
```
When some role is assigned to `john` in the future, it will become default automatically.
@ -77,5 +78,5 @@ When some role is assigned to `john` in the future, it will become default autom
Create the user account `john` and make all his future roles default excepting `role1` and `role2`:
``` sql
ALTER USER john DEFAULT ROLE ALL EXCEPT role1, role2
CREATE USER john DEFAULT ROLE ALL EXCEPT role1, role2
```

View File

@ -13,7 +13,7 @@ Basic query format:
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
For example, consider the table:
@ -30,7 +30,6 @@ CREATE TABLE insert_select_testtable
)
ENGINE = MergeTree()
ORDER BY a
SETTINGS index_granularity = 8192
```
``` sql
@ -55,7 +54,7 @@ SELECT * FROM insert_select_testtable;
│ 1 │ a │ 1 │
└───┴───┴───┘
```
In this example, we see that the second inserted row has `a` and `c` columns filled by the passed values, and `b` filled with value by default.
If a list of columns doesn't include all existing columns, the rest of the columns are filled with:

View File

@ -25,6 +25,8 @@ It is applicable when selecting data from tables that use the [MergeTree](../../
- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines.
- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables.
Now `SELECT` queries with `FINAL` are executed in parallel and slightly faster. But there are drawbacks (see below). The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used.
### Drawbacks {#drawbacks}
Queries that use `FINAL` are executed slightly slower than similar queries that dont, because:

View File

@ -255,6 +255,10 @@ For every different key value encountered, `GROUP BY` calculates a set of aggreg
Aggregation is one of the most important features of a column-oriented DBMS, and thus its implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.
### GROUP BY Optimization Depending on Table Sorting Key {#aggregation-in-order}
The aggregation can be performed more effectively, if a table is sorted by some key, and `GROUP BY` expression contains at least prefix of sorting key or injective functions. In this case when a new key is read from table, the in-between result of aggregation can be finalized and sent to client. This behaviour is switched on by the [optimize_aggregation_in_order](../../../operations/settings/settings.md#optimize_aggregation_in_order) setting. Such optimization reduces memory usage during aggregation, but in some cases may slow down the query execution.
### GROUP BY in External Memory {#select-group-by-in-external-memory}
You can enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`.

View File

@ -25,6 +25,7 @@ SELECT [DISTINCT] expr_list
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m] [WITH TIES]
[SETTINGS ...]
[UNION ...]
[INTO OUTFILE filename]
[FORMAT format]
@ -265,5 +266,17 @@ SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
└─────────────────┴────────┘
```
## SETTINGS in SELECT Query {#settings-in-select}
You can specify the necessary settings right in the `SELECT` query. The setting value is applied only to this query and is reset to default or previous value after the query is executed.
Other ways to make settings see [here](../../../operations/settings/index.md).
**Example**
``` sql
SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
<!--hide-->

View File

@ -231,7 +231,7 @@ Shows privileges for a user.
### Syntax {#show-grants-syntax}
``` sql
SHOW GRANTS [FOR user]
SHOW GRANTS [FOR user1 [, user2 ...]]
```
If user is not specified, the query returns privileges for the current user.
@ -245,7 +245,7 @@ Shows parameters that were used at a [user creation](../../sql-reference/stateme
### Syntax {#show-create-user-syntax}
``` sql
SHOW CREATE USER [name | CURRENT_USER]
SHOW CREATE USER [name1 [, name2 ...] | CURRENT_USER]
```
## SHOW CREATE ROLE {#show-create-role-statement}
@ -255,7 +255,7 @@ Shows parameters that were used at a [role creation](../../sql-reference/stateme
### Syntax {#show-create-role-syntax}
``` sql
SHOW CREATE ROLE name
SHOW CREATE ROLE name1 [, name2 ...]
```
## SHOW CREATE ROW POLICY {#show-create-row-policy-statement}
@ -265,7 +265,7 @@ Shows parameters that were used at a [row policy creation](../../sql-reference/s
### Syntax {#show-create-row-policy-syntax}
``` sql
SHOW CREATE [ROW] POLICY name ON [database.]table
SHOW CREATE [ROW] POLICY name ON [database1.]table1 [, [database2.]table2 ...]
```
## SHOW CREATE QUOTA {#show-create-quota-statement}
@ -275,7 +275,7 @@ Shows parameters that were used at a [quota creation](../../sql-reference/statem
### Syntax {#show-create-quota-syntax}
``` sql
SHOW CREATE QUOTA [name | CURRENT]
SHOW CREATE QUOTA [name1 [, name2 ...] | CURRENT]
```
## SHOW CREATE SETTINGS PROFILE {#show-create-settings-profile-statement}
@ -285,7 +285,7 @@ Shows parameters that were used at a [settings profile creation](../../sql-refer
### Syntax {#show-create-settings-profile-syntax}
``` sql
SHOW CREATE [SETTINGS] PROFILE name
SHOW CREATE [SETTINGS] PROFILE name1 [, name2 ...]
```
## SHOW USERS {#show-users-statement}
@ -307,7 +307,6 @@ Returns a list of [roles](../../operations/access-rights.md#role-management). To
``` sql
SHOW [CURRENT|ENABLED] ROLES
```
## SHOW PROFILES {#show-profiles-statement}
Returns a list of [setting profiles](../../operations/access-rights.md#settings-profiles-management). To view user accounts parameters, see the system table [settings_profiles](../../operations/system-tables/settings_profiles.md#system_tables-settings_profiles).
@ -347,5 +346,86 @@ Returns a [quota](../../operations/quotas.md) consumption for all users or for c
``` sql
SHOW [CURRENT] QUOTA
```
## SHOW ACCESS {#show-access-statement}
Shows all [users](../../operations/access-rights.md#user-account-management), [roles](../../operations/access-rights.md#role-management), [profiles](../../operations/access-rights.md#settings-profiles-management), etc. and all their [grants](../../sql-reference/statements/grant.md#grant-privileges).
### Syntax {#show-access-syntax}
``` sql
SHOW ACCESS
```
## SHOW CLUSTER(s) {#show-cluster-statement}
Returns a list of clusters. All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table.
!!! info "Note"
`SHOW CLUSTER name` query displays the contents of system.clusters table for this cluster.
### Syntax {#show-cluster-syntax}
``` sql
SHOW CLUSTER '<name>'
SWOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
```
### Examples
Query:
``` sql
SHOW CLUSTERS;
```
Result:
```text
┌─cluster──────────────────────────────────────┐
│ test_cluster_two_shards │
│ test_cluster_two_shards_internal_replication │
│ test_cluster_two_shards_localhost │
│ test_shard_localhost │
│ test_shard_localhost_secure │
│ test_unavailable_shard │
└──────────────────────────────────────────────┘
```
Query:
``` sql
SHOW CLUSTERS LIKE 'test%' LIMIT 1;
```
Result:
```text
┌─cluster─────────────────┐
│ test_cluster_two_shards │
└─────────────────────────┘
```
Query:
``` sql
SHOW CLUSTER 'test_shard_localhost' FORMAT Vertical;
```
Result:
```text
Row 1:
──────
cluster: test_shard_localhost
shard_num: 1
shard_weight: 1
replica_num: 1
host_name: localhost
host_address: 127.0.0.1
port: 9000
is_local: 1
user: default
default_database:
errors_count: 0
estimated_recovery_time: 0
```
[Original article](https://clickhouse.tech/docs/en/query_language/show/) <!--hide-->

View File

@ -5,7 +5,11 @@ toc_title: file
# file {#file}
Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
**Syntax**
``` sql
file(path, format, structure)
@ -13,15 +17,15 @@ file(path, format, structure)
**Input parameters**
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings.
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
**Returned value**
A table with the specified structure for reading or writing data in the specified file.
**Example**
**Examples**
Setting `user_files_path` and the contents of the file `test.csv`:
@ -35,12 +39,29 @@ $ cat /var/lib/clickhouse/user_files/test.csv
78,43,45
```
Table from`test.csv` and selection of the first two rows from it:
Getting data from a table in `test.csv` and selecting first two rows from it:
``` sql
SELECT *
FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
```
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
│ 3 │ 2 │ 1 │
└─────────┴─────────┴─────────┘
```
Getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file:
``` sql
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
```
Inserting data from a file into a table:
``` sql
INSERT INTO FUNCTION file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') VALUES (1, 2, 3), (3, 2, 1);
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
```
``` text
@ -50,12 +71,8 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
``` sql
-- getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10
```
**Globs in path**
## Globs in Path {#globs-in-path}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
@ -68,31 +85,25 @@ Constructions with `{}` are similar to the [remote table function](../../sql-ref
**Example**
1. Suppose we have several files with the following relative paths:
Suppose we have several files with the following relative paths:
- some_dir/some_file_1
- some_dir/some_file_2
- some_dir/some_file_3
- another_dir/some_file_1
- another_dir/some_file_2
- another_dir/some_file_3
- 'some_dir/some_file_1'
- 'some_dir/some_file_2'
- 'some_dir/some_file_3'
- 'another_dir/some_file_1'
- 'another_dir/some_file_2'
- 'another_dir/some_file_3'
1. Query the amount of rows in these files:
<!-- -->
Query the amount of rows in these files:
``` sql
SELECT count(*)
FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32')
SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
```
1. Query the amount of rows in all files of these two directories:
<!-- -->
Query the amount of rows in all files of these two directories:
``` sql
SELECT count(*)
FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
```
!!! warning "Warning"
@ -103,8 +114,7 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
Query the data from files named `file000`, `file001`, … , `file999`:
``` sql
SELECT count(*)
FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32')
SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
```
## Virtual Columns {#virtual-columns}
@ -116,4 +126,4 @@ FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32')
- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/file/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/file/) <!--hide-->

View File

@ -5,9 +5,11 @@ toc_title: remote
# remote, remoteSecure {#remote-remotesecure}
Allows you to access remote servers without creating a `Distributed` table.
Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with secured connection.
Signatures:
Both functions can be used in `SELECT` and `INSERT` queries.
**Syntax**
``` sql
remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
@ -16,13 +18,40 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
```
`addresses_expr` An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. The port is the TCP port on the remote server. If the port is omitted, it uses `tcp_port` from the servers config file (by default, 9000).
`sharding_key` - We can specify sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`.
**Input parameters**
- `addresses_expr` An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`.
The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets.
The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the servers config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440).
!!! important "Important"
The port is required for an IPv6 address.
Examples:
Type: [String](../../sql-reference/data-types/string.md).
- `db` - Database name. Type: [String](../../sql-reference/data-types/string.md).
- `table` - Table name. Type: [String](../../sql-reference/data-types/string.md).
- `user` - User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
- `password` - User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
- `sharding_key` - Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
**Returned value**
Dataset from remote servers.
**Usage**
Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and dont use the `remote` table function.
The `remote` table function can be useful in the following cases:
- Accessing a specific server for data comparison, debugging, and testing.
- Queries between various ClickHouse clusters for research purposes.
- Infrequent distributed requests that are made manually.
- Distributed requests where the set of servers is re-defined each time.
**Adresses**
``` text
example01-01-1
@ -33,9 +62,7 @@ localhost
[2a02:6b8:0:1111::11]:9000
```
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data).
Example:
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data). Example:
``` text
example01-01-1,example01-02-1
@ -55,30 +82,28 @@ example01-{01..02}-1
If you have multiple pairs of curly brackets, it generates the direct product of the corresponding sets.
Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md) setting.
Example:
Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md) setting. This example specifies two shards that each have two replicas:
``` text
example01-{01..02}-{1|2}
```
This example specifies two shards that each have two replicas.
The number of addresses generated is limited by a constant. Right now this is 1000 addresses.
Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case, the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and dont use the `remote` table function.
**Examples**
The `remote` table function can be useful in the following cases:
Selecting data from a remote server:
- Accessing a specific server for data comparison, debugging, and testing.
- Queries between various ClickHouse clusters for research purposes.
- Infrequent distributed requests that are made manually.
- Distributed requests where the set of servers is re-defined each time.
``` sql
SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3;
```
If the user is not specified, `default` is used.
If the password is not specified, an empty password is used.
Inserting data from a remote server into a table:
`remoteSecure` - same as `remote` but with secured connection. Default port — [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) from config or 9440.
``` sql
CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory;
INSERT INTO FUNCTION remote('127.0.0.1', currentDatabase(), 'remote_table') VALUES ('test', 42);
SELECT * FROM remote_table;
```
[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/remote/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/remote/) <!--hide-->

Some files were not shown because too many files have changed in this diff Show More