Merge remote-tracking branch 'blessed/master' into speedup_numbers

This commit is contained in:
Raúl Marín 2024-01-03 13:33:22 +00:00
commit ff90f64bc1
335 changed files with 7111 additions and 2941 deletions

View File

@ -1,4 +1,4 @@
Copyright 2016-2023 ClickHouse, Inc.
Copyright 2016-2024 ClickHouse, Inc.
Apache License
Version 2.0, January 2004
@ -188,7 +188,7 @@ Copyright 2016-2023 ClickHouse, Inc.
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2023 ClickHouse, Inc.
Copyright 2016-2024 ClickHouse, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@ -33,12 +33,7 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
* [**ClickHouse Meetup in Sydney**](https://www.meetup.com/clickhouse-sydney-user-group/events/297638812/) - Dec 12
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

View File

@ -18,6 +18,7 @@
#ifndef POCO_UTIL_NO_XMLCONFIGURATION
#include "Poco/String.h"
#include "Poco/SAX/InputSource.h"
#include "Poco/DOM/DOMParser.h"
#include "Poco/DOM/Element.h"
@ -28,6 +29,8 @@
#include "Poco/NumberParser.h"
#include "Poco/NumberFormatter.h"
#include <unordered_map>
#include <algorithm>
#include <iterator>
namespace Poco {
@ -275,8 +278,9 @@ void XMLConfiguration::enumerate(const std::string& key, Keys& range) const
{
if (pChild->nodeType() == Poco::XML::Node::ELEMENT_NODE)
{
const std::string& nodeName = pChild->nodeName();
std::string nodeName = pChild->nodeName();
size_t& count = keys[nodeName];
replaceInPlace(nodeName, ".", "\\.");
if (count)
range.push_back(nodeName + "[" + NumberFormatter::format(count) + "]");
else
@ -379,7 +383,21 @@ Poco::XML::Node* XMLConfiguration::findNode(std::string::const_iterator& it, con
{
while (it != end && *it == _delim) ++it;
std::string key;
while (it != end && *it != _delim && *it != '[') key += *it++;
while (it != end)
{
if (*it == '\\' && std::distance(it, end) > 1)
{
// Skip backslash, copy only the char after it
std::advance(it, 1);
key += *it++;
continue;
}
if (*it == _delim)
break;
if (*it == '[')
break;
key += *it++;
}
return findNode(it, end, findElement(key, pNode, create), create);
}
}

View File

@ -3,10 +3,10 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54482)
SET(VERSION_MAJOR 23)
SET(VERSION_MINOR 13)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 1)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8)
SET(VERSION_DESCRIBE v23.13.1.1-testing)
SET(VERSION_STRING 23.13.1.1)
SET(VERSION_DESCRIBE v24.1.1.1-testing)
SET(VERSION_STRING 24.1.1.1)
# end of autochange

View File

@ -1,4 +1,4 @@
if(OS_LINUX AND TARGET OpenSSL::SSL)
if((OS_LINUX OR OS_DARWIN) AND TARGET OpenSSL::SSL)
option(ENABLE_MYSQL "Enable MySQL" ${ENABLE_LIBRARIES})
else ()
option(ENABLE_MYSQL "Enable MySQL" FALSE)
@ -73,7 +73,7 @@ set(HAVE_SYS_TYPES_H 1)
set(HAVE_SYS_UN_H 1)
set(HAVE_UNISTD_H 1)
set(HAVE_UTIME_H 1)
set(HAVE_UCONTEXT_H 1)
set(HAVE_UCONTEXT_H 0)
set(HAVE_ALLOCA 1)
set(HAVE_DLERROR 0)
set(HAVE_DLOPEN 0)
@ -116,9 +116,13 @@ CONFIGURE_FILE(${CC_SOURCE_DIR}/include/ma_config.h.in
CONFIGURE_FILE(${CC_SOURCE_DIR}/include/mariadb_version.h.in
${CC_BINARY_DIR}/include-public/mariadb_version.h)
if(WITH_SSL)
if (WITH_SSL)
set(SYSTEM_LIBS ${SYSTEM_LIBS} ${SSL_LIBRARIES})
endif()
endif ()
if (OS_DARWIN)
set(SYSTEM_LIBS ${SYSTEM_LIBS} iconv)
endif ()
function(REGISTER_PLUGIN)
@ -227,15 +231,8 @@ ${CC_SOURCE_DIR}/libmariadb/secure/openssl_crypt.c
${CC_BINARY_DIR}/libmariadb/ma_client_plugin.c
)
if(ICONV_INCLUDE_DIR)
include_directories(BEFORE ${ICONV_INCLUDE_DIR})
endif()
add_definitions(-DLIBICONV_PLUG)
if(WITH_DYNCOL)
set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb_dyncol.c)
endif()
set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb_async.c ${CC_SOURCE_DIR}/libmariadb/ma_context.c)

View File

@ -3,10 +3,10 @@ compilers and build settings. Correctly configured Docker daemon is single depen
Usage:
Build deb package with `clang-14` in `debug` mode:
Build deb package with `clang-17` in `debug` mode:
```
$ mkdir deb/test_output
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build
$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-17 --debug-build
$ ls -l deb/test_output
-rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb
-rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb
@ -17,11 +17,11 @@ $ ls -l deb/test_output
```
Build ClickHouse binary with `clang-14` and `address` sanitizer in `relwithdebuginfo`
Build ClickHouse binary with `clang-17` and `address` sanitizer in `relwithdebuginfo`
mode:
```
$ mkdir $HOME/some_clickhouse
$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-14 --sanitizer=address
$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-17 --sanitizer=address
$ ls -l $HOME/some_clickhouse
-rwxr-xr-x 1 root root 787061952 clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse

View File

@ -1,206 +1,206 @@
---
slug: /en/development/build-cross-s390x
sidebar_position: 69
title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux)
sidebar_label: Build on Linux for s390x (zLinux)
---
As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development.
## Building
As s390x does not support boringssl, it uses OpenSSL and has two related build options.
- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.)
- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake
```bash
-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1
```
These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04.
In addition to installing the tooling used to build natively, the following additional packages need to be installed:
```bash
apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static
```
If you wish to cross compile rust code install the rust cross compile target for s390x:
```bash
rustup target add s390x-unknown-linux-gnu
```
To build for s390x:
```bash
cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake ..
ninja
```
## Running
Once built, the binary can be run with, eg.:
```bash
qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse
```
## Debugging
Install LLDB:
```bash
apt-get install lldb-15
```
To Debug a s390x executable, run clickhouse using QEMU in debug mode:
```bash
qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse
```
In another shell run LLDB and attach, replace `<Clickhouse Parent Directory>` and `<build directory>` with the values corresponding to your environment.
```bash
lldb-15
(lldb) target create ./clickhouse
Current executable set to '/<Clickhouse Parent Directory>/ClickHouse/<build directory>/programs/clickhouse' (s390x).
(lldb) settings set target.source-map <build directory> /<Clickhouse Parent Directory>/ClickHouse
(lldb) gdb-remote 31338
Process 1 stopped
* thread #1, stop reason = signal SIGTRAP
frame #0: 0x0000004020e74cd0
-> 0x4020e74cd0: lgr %r2, %r15
0x4020e74cd4: aghi %r15, -160
0x4020e74cd8: xc 0(8,%r15), 0(%r15)
0x4020e74cde: brasl %r14, 275429939040
(lldb) b main
Breakpoint 1: 9 locations.
(lldb) c
Process 1 resuming
Process 1 stopped
* thread #1, stop reason = breakpoint 1.1
frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17
447 #if !defined(FUZZING_MODE)
448 int main(int argc_, char ** argv_)
449 {
-> 450 inside_main = true;
451 SCOPE_EXIT({ inside_main = false; });
452
453 /// PHDR cache is required for query profiler to work reliably
```
## Visual Studio Code integration
- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging.
- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
### Example configurations
#### cmake-variants.yaml
```yaml
buildType:
default: relwithdebinfo
choices:
debug:
short: Debug
long: Emit debug information
buildType: Debug
release:
short: Release
long: Optimize generated code
buildType: Release
relwithdebinfo:
short: RelWithDebInfo
long: Release with Debug Info
buildType: RelWithDebInfo
tsan:
short: MinSizeRel
long: Minimum Size Release
buildType: MinSizeRel
toolchain:
default: default
description: Select toolchain
choices:
default:
short: x86_64
long: x86_64
s390x:
short: s390x
long: s390x
settings:
CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake
```
#### launch.json
```json
{
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "custom",
"name": "(lldb) Launch s390x with qemu",
"targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"],
"processCreateCommands": ["gdb-remote 2159"],
"preLaunchTask": "Run ClickHouse"
}
]
}
```
#### settings.json
This would also put different builds under different subfolders of the `build` folder.
```json
{
"cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}",
"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"
}
```
#### run-debug.sh
```sh
#! /bin/sh
echo 'Starting debugger session'
cd $1
qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4
```
#### tasks.json
Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`.
```json
{
"version": "2.0.0",
"tasks": [
{
"label": "Run ClickHouse",
"type": "shell",
"isBackground": true,
"command": "${workspaceFolder}/.vscode/run-debug.sh",
"args": [
"${command:cmake.launchTargetDirectory}/tmp",
"${command:cmake.launchTargetPath}",
"server",
"--config-file=${workspaceFolder}/programs/server/config.xml"
],
"problemMatcher": [
{
"pattern": [
{
"regexp": ".",
"file": 1,
"location": 2,
"message": 3
}
],
"background": {
"activeOnStart": true,
"beginsPattern": "^Starting debugger session",
"endsPattern": ".*"
}
}
]
}
]
}
```
---
slug: /en/development/build-cross-s390x
sidebar_position: 69
title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux)
sidebar_label: Build on Linux for s390x (zLinux)
---
As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development.
## Building
As s390x does not support boringssl, it uses OpenSSL and has two related build options.
- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.)
- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake
```bash
-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1
```
These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04.
In addition to installing the tooling used to build natively, the following additional packages need to be installed:
```bash
apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static
```
If you wish to cross compile rust code install the rust cross compile target for s390x:
```bash
rustup target add s390x-unknown-linux-gnu
```
To build for s390x:
```bash
cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake ..
ninja
```
## Running
Once built, the binary can be run with, eg.:
```bash
qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse
```
## Debugging
Install LLDB:
```bash
apt-get install lldb-15
```
To Debug a s390x executable, run clickhouse using QEMU in debug mode:
```bash
qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse
```
In another shell run LLDB and attach, replace `<Clickhouse Parent Directory>` and `<build directory>` with the values corresponding to your environment.
```bash
lldb-15
(lldb) target create ./clickhouse
Current executable set to '/<Clickhouse Parent Directory>/ClickHouse/<build directory>/programs/clickhouse' (s390x).
(lldb) settings set target.source-map <build directory> /<Clickhouse Parent Directory>/ClickHouse
(lldb) gdb-remote 31338
Process 1 stopped
* thread #1, stop reason = signal SIGTRAP
frame #0: 0x0000004020e74cd0
-> 0x4020e74cd0: lgr %r2, %r15
0x4020e74cd4: aghi %r15, -160
0x4020e74cd8: xc 0(8,%r15), 0(%r15)
0x4020e74cde: brasl %r14, 275429939040
(lldb) b main
Breakpoint 1: 9 locations.
(lldb) c
Process 1 resuming
Process 1 stopped
* thread #1, stop reason = breakpoint 1.1
frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17
447 #if !defined(FUZZING_MODE)
448 int main(int argc_, char ** argv_)
449 {
-> 450 inside_main = true;
451 SCOPE_EXIT({ inside_main = false; });
452
453 /// PHDR cache is required for query profiler to work reliably
```
## Visual Studio Code integration
- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging.
- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
### Example configurations
#### cmake-variants.yaml
```yaml
buildType:
default: relwithdebinfo
choices:
debug:
short: Debug
long: Emit debug information
buildType: Debug
release:
short: Release
long: Optimize generated code
buildType: Release
relwithdebinfo:
short: RelWithDebInfo
long: Release with Debug Info
buildType: RelWithDebInfo
tsan:
short: MinSizeRel
long: Minimum Size Release
buildType: MinSizeRel
toolchain:
default: default
description: Select toolchain
choices:
default:
short: x86_64
long: x86_64
s390x:
short: s390x
long: s390x
settings:
CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake
```
#### launch.json
```json
{
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "custom",
"name": "(lldb) Launch s390x with qemu",
"targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"],
"processCreateCommands": ["gdb-remote 2159"],
"preLaunchTask": "Run ClickHouse"
}
]
}
```
#### settings.json
This would also put different builds under different subfolders of the `build` folder.
```json
{
"cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}",
"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"
}
```
#### run-debug.sh
```sh
#! /bin/sh
echo 'Starting debugger session'
cd $1
qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4
```
#### tasks.json
Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`.
```json
{
"version": "2.0.0",
"tasks": [
{
"label": "Run ClickHouse",
"type": "shell",
"isBackground": true,
"command": "${workspaceFolder}/.vscode/run-debug.sh",
"args": [
"${command:cmake.launchTargetDirectory}/tmp",
"${command:cmake.launchTargetPath}",
"server",
"--config-file=${workspaceFolder}/programs/server/config.xml"
],
"problemMatcher": [
{
"pattern": [
{
"regexp": ".",
"file": 1,
"location": 2,
"message": 3
}
],
"background": {
"activeOnStart": true,
"beginsPattern": "^Starting debugger session",
"endsPattern": ".*"
}
}
]
}
]
}
```

View File

@ -25,7 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[ORDER BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
[SETTINGS name=value, clean_deleted_rows=value, ...]
```
For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md).
@ -88,6 +88,53 @@ SELECT * FROM mySecondReplacingMT FINAL;
└─────┴─────────┴─────────────────────┘
```
### is_deleted
`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row.
Column data type — `UInt8`.
:::note
`is_deleted` can only be enabled when `ver` is used.
The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`.
No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept.
:::
Example:
```sql
-- with ver and is_deleted
CREATE OR REPLACE TABLE myThirdReplacingMT
(
`key` Int64,
`someCol` String,
`eventTime` DateTime,
`is_deleted` UInt8
)
ENGINE = ReplacingMergeTree(eventTime, is_deleted)
ORDER BY key;
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0);
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1);
select * from myThirdReplacingMT final;
0 rows in set. Elapsed: 0.003 sec.
-- delete rows with is_deleted
OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP;
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0);
select * from myThirdReplacingMT final;
┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐
│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │
└─────┴─────────┴─────────────────────┴────────────┘
```
## Query clauses
When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.

View File

@ -25,8 +25,7 @@ The steps below will easily work on a local install of ClickHouse too. The only
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result:
```sql
DESCRIBE s3Cluster(
'default',
DESCRIBE s3(
'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
'JSONLines'
);
@ -35,29 +34,29 @@ DESCRIBE s3Cluster(
ClickHouse infers the following schema from the JSON file:
```response
┌─name────────────────┬─type─────────────────────────────────┐
│ id │ Nullable(String) │
│ fetch_date │ Nullable(Int64)
│ upload_date │ Nullable(String) │
│ title │ Nullable(String) │
│ uploader_id │ Nullable(String) │
│ uploader │ Nullable(String) │
│ uploader_sub_count │ Nullable(Int64) │
│ is_age_limit │ Nullable(Bool) │
│ view_count │ Nullable(Int64) │
│ like_count │ Nullable(Int64) │
│ dislike_count │ Nullable(Int64) │
│ is_crawlable │ Nullable(Bool) │
│ is_live_content │ Nullable(Bool) │
│ has_subtitles │ Nullable(Bool) │
│ is_ads_enabled │ Nullable(Bool) │
│ is_comments_enabled │ Nullable(Bool) │
│ description │ Nullable(String) │
│ rich_metadata │ Array(Map(String, Nullable(String)))
│ super_titles │ Array(Map(String, Nullable(String)))
│ uploader_badges │ Nullable(String) │
│ video_badges │ Nullable(String) │
└─────────────────────┴──────────────────────────────────────┘
┌─name────────────────┬─type───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─
│ id │ Nullable(String) │ │ │ │ │
│ fetch_date │ Nullable(String) │ │ │ │ │
│ upload_date │ Nullable(String) │ │ │ │ │
│ title │ Nullable(String) │ │ │ │ │
│ uploader_id │ Nullable(String) │ │ │ │ │
│ uploader │ Nullable(String) │ │ │ │ │
│ uploader_sub_count │ Nullable(Int64) │ │ │ │ │
│ is_age_limit │ Nullable(Bool) │ │ │ │ │
│ view_count │ Nullable(Int64) │ │ │ │ │
│ like_count │ Nullable(Int64) │ │ │ │ │
│ dislike_count │ Nullable(Int64) │ │ │ │ │
│ is_crawlable │ Nullable(Bool) │ │ │ │ │
│ is_live_content │ Nullable(Bool) │ │ │ │ │
│ has_subtitles │ Nullable(Bool) │ │ │ │ │
│ is_ads_enabled │ Nullable(Bool) │ │ │ │ │
│ is_comments_enabled │ Nullable(Bool) │ │ │ │ │
│ description │ Nullable(String) │ │ │ │ │
│ rich_metadata │ Array(Tuple(call Nullable(String), content Nullable(String), subtitle Nullable(String), title Nullable(String), url Nullable(String))) │ │ │ │ │
│ super_titles │ Array(Tuple(text Nullable(String), url Nullable(String))) │ │ │ │ │
│ uploader_badges │ Nullable(String) │ │ │ │ │
│ video_badges │ Nullable(String) │ │ │ │ │
└─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────
```
2. Based on the inferred schema, we cleaned up the data types and added a primary key. Define the following table:
@ -82,13 +81,13 @@ CREATE TABLE youtube
`is_ads_enabled` Bool,
`is_comments_enabled` Bool,
`description` String,
`rich_metadata` Array(Map(String, String)),
`super_titles` Array(Map(String, String)),
`rich_metadata` Array(Tuple(call String, content String, subtitle String, title String, url String)),
`super_titles` Array(Tuple(text String, url String)),
`uploader_badges` String,
`video_badges` String
)
ENGINE = MergeTree
ORDER BY (uploader, upload_date);
ORDER BY (uploader, upload_date)
```
3. The following command streams the records from the S3 files into the `youtube` table.

View File

@ -852,6 +852,16 @@ If the file name for column is too long (more than `max_file_name_length` bytes)
The maximal length of the file name to keep it as is without hashing. Takes effect only if setting `replace_long_file_name_to_hash` is enabled. The value of this setting does not include the length of file extension. So, it is recommended to set it below the maximum filename length (usually 255 bytes) with some gap to avoid filesystem errors. Default value: 127.
## clean_deleted_rows
Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour.
Possible values:
- `Always` or `Never`.
Default value: `Never`
## allow_experimental_block_number_column
Persists virtual column `_block_number` on merges.

View File

@ -4,7 +4,7 @@ sidebar_position: 63
sidebar_label: User Settings
---
# User Settings
# Users and Roles Settings
The `users` section of the `user.xml` configuration file contains user settings.
@ -187,3 +187,34 @@ The following configuration forces that user `user1` can only see the rows of `t
```
The `filter` can be any expression resulting in a [UInt8](../../sql-reference/data-types/int-uint.md)-type value. It usually contains comparisons and logical operators. Rows from `database_name.table1` where filter results to 0 are not returned for this user. The filtering is incompatible with `PREWHERE` operations and disables `WHERE→PREWHERE` optimization.
## Roles
You can create any predefined roles using the `roles` section of the `user.xml` configuration file.
Structure of the `roles` section:
```xml
<roles>
<test_role>
<grants>
<query>GRANT SHOW ON *.*</query>
<query>REVOKE SHOW ON system.*</query>
<query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
</grants>
</test_role>
</roles>
```
These roles can also be granted to users from the `users` section:
```xml
<users>
<user_name>
...
<grants>
<query>GRANT test_role</query>
</grants>
</user_name>
<users>
```

View File

@ -196,7 +196,7 @@ These settings should be defined in the disk configuration section.
- `max_elements` - a limit for a number of cache files. Default: `10000000`.
- `load_metadata_threads` - number of threads being used to load cache metadata on starting time. Default: `1`.
- `load_metadata_threads` - number of threads being used to load cache metadata on starting time. Default: `16`.
File Cache **query/profile settings**:

View File

@ -0,0 +1,26 @@
---
slug: /en/operations/system-tables/database_engines
---
# database_engines
Contains the list of database engines supported by the server.
This table contains the following columns (the column type is shown in brackets):
- `name` (String) — The name of database engine.
Example:
``` sql
SELECT *
FROM system.database_engines
WHERE name in ('Atomic', 'Lazy', 'Ordinary')
```
``` text
┌─name─────┐
│ Ordinary │
│ Atomic │
│ Lazy │
└──────────┘
```

View File

@ -9,11 +9,15 @@ Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — name of the error (`errorCodeToName`).
- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — code number of the error.
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened.
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened.
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error happened.
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — the time when the last error happened.
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored.
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query).
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) that represents a list of physical addresses where the called methods are stored.
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed queries).
:::note
Counters for some errors may increase during successful query execution. It's not recommended to use this table for server monitoring purposes unless you are sure that corresponding error can not be a false positive.
:::
**Example**

View File

@ -0,0 +1,43 @@
---
slug: /en/operations/system-tables/view_refreshes
---
# view_refreshes
Information about [Refreshable Materialized Views](../../sql-reference/statements/create/view.md#refreshable-materialized-view). Contains all refreshable materialized views, regardless of whether there's a refresh in progress or not.
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in.
- `view` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `status` ([String](../../sql-reference/data-types/string.md)) — Current state of the refresh.
- `last_refresh_result` ([String](../../sql-reference/data-types/string.md)) — Outcome of the latest refresh attempt.
- `last_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the last refresh attempt. `NULL` if no refresh attempts happened since server startup or table creation.
- `last_success_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the last successful refresh. `NULL` if no successful refreshes happened since server startup or table creation.
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — How long the last refresh attempt took.
- `next_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time at which the next refresh is scheduled to start.
- `remaining_dependencies` ([Array(String)](../../sql-reference/data-types/array.md)) — If the view has [refresh dependencies](../../sql-reference/statements/create/view.md#refresh-dependencies), this array contains the subset of those dependencies that are not satisfied for the current refresh yet. If `status = 'WaitingForDependencies'`, a refresh is ready to start as soon as these dependencies are fulfilled.
- `exception` ([String](../../sql-reference/data-types/string.md)) — if `last_refresh_result = 'Exception'`, i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace.
- `refresh_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of successful refreshes since last server restart or table creation.
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — Progress of the current refresh, between 0 and 1.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of rows read by the current refresh so far.
- `total_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Estimated total number of rows that need to be read by the current refresh.
(There are additional columns related to current refresh progress, but they are currently unreliable.)
**Example**
```sql
SELECT
database,
view,
status,
last_refresh_result,
last_refresh_time,
next_refresh_time
FROM system.view_refreshes
┌─database─┬─view───────────────────────┬─status────┬─last_refresh_result─┬───last_refresh_time─┬───next_refresh_time─┐
│ default │ hello_documentation_reader │ Scheduled │ Finished │ 2023-12-01 01:24:00 │ 2023-12-01 01:25:00 │
└──────────┴────────────────────────────┴───────────┴─────────────────────┴─────────────────────┴─────────────────────┘
```

View File

@ -501,41 +501,3 @@ Result:
│ 0 │
└────────────────────────────────────────────────────────────────────┘
```
## reverseDNSQuery
Performs a reverse DNS query to get the PTR records associated with the IP address.
**Syntax**
``` sql
reverseDNSQuery(address)
```
This function performs reverse DNS resolutions on both IPv4 and IPv6.
**Arguments**
- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Associated domains (PTR records).
Type: Type: [Array(String)](../../sql-reference/data-types/array.md).
**Example**
Query:
``` sql
SELECT reverseDNSQuery('192.168.0.2');
```
Result:
``` text
┌─reverseDNSQuery('192.168.0.2')────────────┐
│ ['test2.example.com','test3.example.com'] │
└───────────────────────────────────────────┘
```

View File

@ -6,28 +6,28 @@ sidebar_label: VIEW
# ALTER TABLE … MODIFY QUERY Statement
You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
The `allow_experimental_alter_materialized_view_structure` setting must be enabled.
The `allow_experimental_alter_materialized_view_structure` setting must be enabled.
This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
**Example with TO table**
```sql
CREATE TABLE events (ts DateTime, event_type String)
CREATE TABLE events (ts DateTime, event_type String)
ENGINE = MergeTree ORDER BY (event_type, ts);
CREATE TABLE events_by_day (ts DateTime, event_type String, events_cnt UInt64)
CREATE TABLE events_by_day (ts DateTime, event_type String, events_cnt UInt64)
ENGINE = SummingMergeTree ORDER BY (event_type, ts);
CREATE MATERIALIZED VIEW mv TO events_by_day AS
CREATE MATERIALIZED VIEW mv TO events_by_day AS
SELECT toStartOfDay(ts) ts, event_type, count() events_cnt
FROM events
GROUP BY ts, event_type;
GROUP BY ts, event_type;
INSERT INTO events
SELECT Date '2020-01-01' + interval number * 900 second,
INSERT INTO events
SELECT Date '2020-01-01' + interval number * 900 second,
['imp', 'click'][number%2+1]
FROM numbers(100);
@ -43,23 +43,23 @@ ORDER BY ts, event_type;
│ 2020-01-02 00:00:00 │ imp │ 2 │
└─────────────────────┴────────────┴─────────────────┘
-- Let's add the new measurment `cost`
-- Let's add the new measurment `cost`
-- and the new dimension `browser`.
ALTER TABLE events
ALTER TABLE events
ADD COLUMN browser String,
ADD COLUMN cost Float64;
-- Column do not have to match in a materialized view and TO
-- (destination table), so the next alter does not break insertion.
ALTER TABLE events_by_day
ALTER TABLE events_by_day
ADD COLUMN cost Float64,
ADD COLUMN browser String after event_type,
MODIFY ORDER BY (event_type, ts, browser);
INSERT INTO events
SELECT Date '2020-01-02' + interval number * 900 second,
INSERT INTO events
SELECT Date '2020-01-02' + interval number * 900 second,
['imp', 'click'][number%2+1],
['firefox', 'safary', 'chrome'][number%3+1],
10/(number+1)%33
@ -82,16 +82,16 @@ ORDER BY ts, event_type;
└─────────────────────┴────────────┴─────────┴────────────┴──────┘
SET allow_experimental_alter_materialized_view_structure=1;
ALTER TABLE mv MODIFY QUERY
ALTER TABLE mv MODIFY QUERY
SELECT toStartOfDay(ts) ts, event_type, browser,
count() events_cnt,
sum(cost) cost
FROM events
GROUP BY ts, event_type, browser;
INSERT INTO events
SELECT Date '2020-01-03' + interval number * 900 second,
INSERT INTO events
SELECT Date '2020-01-03' + interval number * 900 second,
['imp', 'click'][number%2+1],
['firefox', 'safary', 'chrome'][number%3+1],
10/(number+1)%33
@ -138,7 +138,7 @@ PRIMARY KEY (event_type, ts)
ORDER BY (event_type, ts, browser)
SETTINGS index_granularity = 8192
-- !!! The columns' definition is unchanged but it does not matter, we are not quering
-- !!! The columns' definition is unchanged but it does not matter, we are not quering
-- MATERIALIZED VIEW, we are quering TO (storage) table.
-- SELECT section is updated.
@ -169,7 +169,7 @@ The application is very limited because you can only change the `SELECT` section
```sql
CREATE TABLE src_table (`a` UInt32) ENGINE = MergeTree ORDER BY a;
CREATE MATERIALIZED VIEW mv (`a` UInt32) ENGINE = MergeTree ORDER BY a AS SELECT a FROM src_table;
CREATE MATERIALIZED VIEW mv (`a` UInt32) ENGINE = MergeTree ORDER BY a AS SELECT a FROM src_table;
INSERT INTO src_table (a) VALUES (1), (2);
SELECT * FROM mv;
```
@ -199,3 +199,7 @@ SELECT * FROM mv;
## ALTER LIVE VIEW Statement
`ALTER LIVE VIEW ... REFRESH` statement refreshes a [Live view](../create/view.md#live-view). See [Force Live View Refresh](../create/view.md#live-view-alter-refresh).
## ALTER TABLE … MODIFY REFRESH Statement
`ALTER TABLE ... MODIFY REFRESH` statement changes refresh parameters of a [Refreshable Materialized View](../create/view.md#refreshable-materialized-view). See [Changing Refresh Parameters](../create/view.md#changing-refresh-parameters).

View File

@ -37,6 +37,7 @@ SELECT a, b, c FROM (SELECT ...)
```
## Parameterized View
Parametrized views are similar to normal views, but can be created with parameters which are not resolved immediately. These views can be used with table functions, which specify the name of the view as function name and the parameter values as its arguments.
``` sql
@ -66,7 +67,7 @@ When creating a materialized view with `TO [db].[table]`, you can't also use `PO
A materialized view is implemented as follows: when inserting data to the table specified in `SELECT`, part of the inserted data is converted by this `SELECT` query, and the result is inserted in the view.
:::note
:::note
Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
Materialized views in ClickHouse are implemented more like insert triggers. If theres some aggregation in the view query, its applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
@ -96,9 +97,116 @@ This feature is deprecated and will be removed in the future.
For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md)
## Refreshable Materialized View {#refreshable-materialized-view}
```sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name
REFRESH EVERY|AFTER interval [OFFSET interval]
RANDOMIZE FOR interval
DEPENDS ON [db.]name [, [db.]name [, ...]]
[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY]
AS SELECT ...
```
where `interval` is a sequence of simple intervals:
```sql
number SECOND|MINUTE|HOUR|DAY|WEEK|MONTH|YEAR
```
Periodically runs the corresponding query and stores its result in a table, atomically replacing the table's previous contents.
Differences from regular non-refreshable materialized views:
* No insert trigger. I.e. when new data is inserted into the table specified in SELECT, it's *not* automatically pushed to the refreshable materialized view. The periodic refresh runs the entire query and replaces the entire table.
* No restrictions on the SELECT query. Table functions (e.g. `url()`), views, UNION, JOIN, are all allowed.
:::note
Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations:
* not compatible with Replicated database or table engines,
* require [Atomic database engine](../../../engines/database-engines/atomic.md),
* no retries for failed refresh - we just skip to the next scheduled refresh time,
* no limit on number of concurrent refreshes.
:::
### Refresh Schedule
Example refresh schedules:
```sql
REFRESH EVERY 1 DAY -- every day, at midnight (UTC)
REFRESH EVERY 1 MONTH -- on 1st day of every month, at midnight
REFRESH EVERY 1 MONTH OFFSET 5 DAY 2 HOUR -- on 6th day of every month, at 2:00 am
REFRESH EVERY 2 WEEK OFFSET 5 DAY 15 HOUR 10 MINUTE -- every other Saturday, at 3:10 pm
REFRESH EVERY 30 MINUTE -- at 00:00, 00:30, 01:00, 01:30, etc
REFRESH AFTER 30 MINUTE -- 30 minutes after the previous refresh completes, no alignment with time of day
-- REFRESH AFTER 1 HOUR OFFSET 1 MINUTE -- syntax errror, OFFSET is not allowed with AFTER
```
`RANDOMIZE FOR` randomly adjusts the time of each refresh, e.g.:
```sql
REFRESH EVERY 1 DAY OFFSET 2 HOUR RANDOMIZE FOR 1 HOUR -- every day at random time between 01:30 and 02:30
```
At most one refresh may be running at a time, for a given view. E.g. if a view with `REFRESH EVERY 1 MINUTE` takes 2 minutes to refresh, it'll just be refreshing every 2 minutes. If it then becomes faster and starts refreshing in 10 seconds, it'll go back to refreshing every minute. (In particular, it won't refresh every 10 seconds to catch up with a backlog of missed refreshes - there's no such backlog.)
Additionally, a refresh is started immediately after the materialized view is created, unless `EMPTY` is specified in the `CREATE` query. If `EMPTY` is specified, the first refresh happens according to schedule.
### Dependencies {#refresh-dependencies}
`DEPENDS ON` synchronizes refreshes of different tables. By way of example, suppose there's a chain of two refreshable materialized views:
```sql
CREATE MATERIALIZED VIEW source REFRESH EVERY 1 DAY AS SELECT * FROM url(...)
CREATE MATERIALIZED VIEW destination REFRESH EVERY 1 DAY AS SELECT ... FROM source
```
Without `DEPENDS ON`, both views will start a refresh at midnight, and `destination` typically will see yesterday's data in `source`. If we add dependency:
```
CREATE MATERIALIZED VIEW destination REFRESH EVERY 1 DAY DEPENDS ON source AS SELECT ... FROM source
```
then `destination`'s refresh will start only after `source`'s refresh finished for that day, so `destination` will be based on fresh data.
Alternatively, the same result can be achieved with:
```
CREATE MATERIALIZED VIEW destination REFRESH AFTER 1 HOUR DEPENDS ON source AS SELECT ... FROM source
```
where `1 HOUR` can be any duration less than `source`'s refresh period. The dependent table won't be refreshed more frequently than any of its dependencies. This is a valid way to set up a chain of refreshable views without specifying the real refresh period more than once.
A few more examples:
* `REFRESH EVERY 1 DAY OFFSET 10 MINUTE` (`destination`) depends on `REFRESH EVERY 1 DAY` (`source`)<br/>
If `source` refresh takes more than 10 minutes, `destination` will wait for it.
* `REFRESH EVERY 1 DAY OFFSET 1 HOUR` depends on `REFRESH EVERY 1 DAY OFFSET 23 HOUR`<br/>
Similar to the above, even though the corresponding refreshes happen on different calendar days.
`destination`'s refresh on day X+1 will wait for `source`'s refresh on day X (if it takes more than 2 hours).
* `REFRESH EVERY 2 HOUR` depends on `REFRESH EVERY 1 HOUR`<br/>
The 2 HOUR refresh happens after the 1 HOUR refresh for every other hour, e.g. after the midnight
refresh, then after the 2am refresh, etc.
* `REFRESH EVERY 1 MINUTE` depends on `REFRESH EVERY 2 HOUR`<br/>
`REFRESH AFTER 1 MINUTE` depends on `REFRESH EVERY 2 HOUR`<br/>
`REFRESH AFTER 1 MINUTE` depends on `REFRESH AFTER 2 HOUR`<br/>
`destination` is refreshed once after every `source` refresh, i.e. every 2 hours. The `1 MINUTE` is effectively ignored.
* `REFRESH AFTER 1 HOUR` depends on `REFRESH AFTER 1 HOUR`<br/>
Currently this is not recommended.
:::note
`DEPENDS ON` only works between refreshable materialized views. Listing a regular table in the `DEPENDS ON` list will prevent the view from ever refreshing (dependencies can be removed with `ALTER`, see below).
:::
### Changing Refresh Parameters {#changing-refresh-parameters}
To change refresh parameters:
```
ALTER TABLE [db.]name MODIFY REFRESH EVERY|AFTER ... [RANDOMIZE FOR ...] [DEPENDS ON ...]
```
:::note
This replaces refresh schedule *and* dependencies. If the table had a `DEPENDS ON`, doing a `MODIFY REFRESH` without `DEPENDS ON` will remove the dependencies.
:::
### Other operations
The status of all refreshable materialized views is available in table [`system.view_refreshes`](../../../operations/system-tables/view_refreshes.md). In particular, it contains refresh progress (if running), last and next refresh time, exception message if a refresh failed.
To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|CANCEL VIEW`](../system.md#refreshable-materialized-views).
## Window View [Experimental]
:::info
:::info
This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`.
:::

View File

@ -449,7 +449,7 @@ SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
```
### SYSTEM STOP LISTEN
## SYSTEM STOP LISTEN
Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
@ -464,7 +464,7 @@ SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QU
- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause.
### SYSTEM START LISTEN
## SYSTEM START LISTEN
Allows new connections to be established on the specified protocols.
@ -473,3 +473,47 @@ However, if the server on the specified port and protocol was not stopped using
```sql
SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
```
## Managing Refreshable Materialized Views {#refreshable-materialized-views}
Commands to control background tasks performed by [Refreshable Materialized Views](../../sql-reference/statements/create/view.md#refreshable-materialized-view)
Keep an eye on [`system.view_refreshes`](../../operations/system-tables/view_refreshes.md) while using them.
### SYSTEM REFRESH VIEW
Trigger an immediate out-of-schedule refresh of a given view.
```sql
SYSTEM REFRESH VIEW [db.]name
```
### SYSTEM STOP VIEW, SYSTEM STOP VIEWS
Disable periodic refreshing of the given view or all refreshable views. If a refresh is in progress, cancel it too.
```sql
SYSTEM STOP VIEW [db.]name
```
```sql
SYSTEM STOP VIEWS
```
### SYSTEM START VIEW, SYSTEM START VIEWS
Enable periodic refreshing for the given view or all refreshable views. No immediate refresh is triggered.
```sql
SYSTEM START VIEW [db.]name
```
```sql
SYSTEM START VIEWS
```
### SYSTEM CANCEL VIEW
If there's a refresh in progress for the given view, interrupt and cancel it. Otherwise do nothing.
```sql
SYSTEM CANCEL VIEW [db.]name
```

View File

@ -86,6 +86,59 @@ SELECT * FROM mySecondReplacingMT FINAL;
│ 1 │ first │ 2020-01-01 01:01:01 │
└─────┴─────────┴─────────────────────┘
```
### is_deleted
`is_deleted` — Имя столбца, который используется во время слияния для обозначения того, нужно ли отображать строку или она подлежит удалению; `1` - для удаления строки, `0` - для отображения строки.
Тип данных столбца — `UInt8`.
:::note
`is_deleted` может быть использован, если `ver` используется.
Строка удаляется в следующих случаях:
- при использовании инструкции `OPTIMIZE ... FINAL CLEANUP`
- при использовании инструкции `OPTIMIZE ... FINAL`
- параметр движка `clean_deleted_rows` установлен в значение `Always` (по умолчанию - `Never`)
- есть новые версии строки
Не рекомендуется выполнять `FINAL CLEANUP` или использовать параметр движка `clean_deleted_rows` со значением `Always`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться.
Вне зависимости от производимых изменений над данными, версия должна увеличиваться. Если у двух строк одна и та же версия, то остается только последняя вставленная строка.
:::
Пример:
```sql
-- with ver and is_deleted
CREATE OR REPLACE TABLE myThirdReplacingMT
(
`key` Int64,
`someCol` String,
`eventTime` DateTime,
`is_deleted` UInt8
)
ENGINE = ReplacingMergeTree(eventTime, is_deleted)
ORDER BY key;
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0);
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1);
select * from myThirdReplacingMT final;
0 rows in set. Elapsed: 0.003 sec.
-- delete rows with is_deleted
OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP;
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0);
select * from myThirdReplacingMT final;
┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐
│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │
└─────┴─────────┴─────────────────────┴────────────┘
```
## Секции запроса

View File

@ -1,9 +1,9 @@
---
slug: /ru/getting-started/example-datasets/github-events
sidebar_label: GitHub Events
title: "GitHub Events Dataset"
title: "Набор данных о событиях на GitHub"
---
import Content from '@site/docs/en/getting-started/example-datasets/github-events.md';
Набор данных о событиях на GitHub с 2011 года по 6 декабря 2020 года содержит 3,1 млрд записей. Объём исходных данных — 75 ГБ, для загрузки в Clickhouse потребуется около 200 ГБ свободного пространства хранения (при использовании метода сжатия lz4).
<Content />
Полное описание набора, инструкции по загрузке и запросы к нему опубликованы на https://ghe.clickhouse.tech/

View File

@ -1,115 +1,115 @@
---
---
slug: /ru/operations/utilities/clickhouse-format
sidebar_position: 65
sidebar_label: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Позволяет форматировать входящие запросы.
Ключи:
- `--help` или`-h` — выводит описание ключей.
- `--query` — форматирует запрос любой длины и сложности.
- `--hilite` — добавляет подсветку синтаксиса с экранированием символов.
- `--oneline` — форматирование в одну строку.
- `--quiet` или `-q` — проверяет синтаксис без вывода результата.
- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке.
- `--obfuscate` — обфусцирует вместо форматирования.
- `--seed <строка>` — задает строку, которая определяет результат обфускации.
- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки.
## Примеры {#examples}
1. Форматирование запроса:
```bash
$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;"
```
Результат:
```text
SELECT number
FROM numbers(10)
WHERE number % 2
ORDER BY number DESC
```
2. Подсветка синтаксиса и форматирование в одну строку:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Результат:
```sql
SELECT sum(number) FROM numbers(5)
```
3. Несколько запросов в одной строке:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
4. Обфускация:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Тот же запрос с другой инициализацией обфускатора:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
5. Добавление обратного слеша:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```
sidebar_position: 65
sidebar_label: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Позволяет форматировать входящие запросы.
Ключи:
- `--help` или`-h` — выводит описание ключей.
- `--query` — форматирует запрос любой длины и сложности.
- `--hilite` — добавляет подсветку синтаксиса с экранированием символов.
- `--oneline` — форматирование в одну строку.
- `--quiet` или `-q` — проверяет синтаксис без вывода результата.
- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке.
- `--obfuscate` — обфусцирует вместо форматирования.
- `--seed <строка>` — задает строку, которая определяет результат обфускации.
- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки.
## Примеры {#examples}
1. Форматирование запроса:
```bash
$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;"
```
Результат:
```text
SELECT number
FROM numbers(10)
WHERE number % 2
ORDER BY number DESC
```
2. Подсветка синтаксиса и форматирование в одну строку:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Результат:
```sql
SELECT sum(number) FROM numbers(5)
```
3. Несколько запросов в одной строке:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
4. Обфускация:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Тот же запрос с другой инициализацией обфускатора:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
5. Добавление обратного слеша:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```

View File

@ -1,62 +1,62 @@
---
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
# sparkbar {#sparkbar}
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются.
Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
Значения `x` вне указанного интервала игнорируются.
**Синтаксис**
``` sql
sparkbar(width[, min_x, max_x])(x, y)
```
**Параметры**
- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — Начало интервала. Необязательный параметр.
- `max_x` — Конец интервала. Необязательный параметр.
**Аргументы**
- `x` — Поле со значениями.
- `y` — Поле с частотой повторения значений.
**Возвращаемые значения**
- Гистограмма частот.
**Пример**
Запрос:
``` sql
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Результат:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```
---
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
# sparkbar {#sparkbar}
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются.
Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
Значения `x` вне указанного интервала игнорируются.
**Синтаксис**
``` sql
sparkbar(width[, min_x, max_x])(x, y)
```
**Параметры**
- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — Начало интервала. Необязательный параметр.
- `max_x` — Конец интервала. Необязательный параметр.
**Аргументы**
- `x` — Поле со значениями.
- `y` — Поле с частотой повторения значений.
**Возвращаемые значения**
- Гистограмма частот.
**Пример**
Запрос:
``` sql
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Результат:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,18 @@
---
---
slug: /zh/faq/general/dbms-naming
title: "\u201CClickHouse\u201D 有什么含义?"
toc_hidden: true
sidebar_position: 10
---
# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean}
它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上ClickHouse本应该保存人们在互联网上的所有点击记录现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。
这个由两部分组成的意思有两个结果:
- 唯一正确的写“Click**H**ouse”的方式是用大写H。
- 如果需要缩写,请使用“**CH**”。由于一些历史原因缩写CK在中国也很流行主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
!!! info “有趣的事实”
多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。
title: "\u201CClickHouse\u201D 有什么含义?"
toc_hidden: true
sidebar_position: 10
---
# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean}
它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上ClickHouse本应该保存人们在互联网上的所有点击记录现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。
这个由两部分组成的意思有两个结果:
- 唯一正确的写“Click**H**ouse”的方式是用大写H。
- 如果需要缩写,请使用“**CH**”。由于一些历史原因缩写CK在中国也很流行主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
!!! info “有趣的事实”
多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。

View File

@ -1,18 +1,18 @@
---
---
slug: /zh/faq/general/how-do-i-contribute-code-to-clickhouse
title: 我如何为ClickHouse贡献代码?
toc_hidden: true
sidebar_position: 120
---
# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse}
ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。
按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。
如果你想对ClickHouse提出实质性的改变建议可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。
如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。
title: 我如何为ClickHouse贡献代码?
toc_hidden: true
sidebar_position: 120
---
# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse}
ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。
按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。
如果你想对ClickHouse提出实质性的改变建议可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。
如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。

View File

@ -1,22 +1,22 @@
---
slug: /zh/faq/integration/
title: 关于集成ClickHouse和其他系统的问题
toc_hidden_folder: true
sidebar_position: 4
sidebar_label: Integration
---
# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems}
问题:
- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md)
- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md)
- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
---
slug: /zh/faq/integration/
title: 关于集成ClickHouse和其他系统的问题
toc_hidden_folder: true
sidebar_position: 4
sidebar_label: Integration
---
# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems}
问题:
- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md)
- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md)
- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
{## [原文](https://clickhouse.com/docs/en/faq/integration/) ##}

View File

@ -1,21 +1,21 @@
---
slug: /zh/faq/operations/
title: 关于操作ClickHouse服务器和集群的问题
toc_hidden_folder: true
sidebar_position: 3
sidebar_label: Operations
---
# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters}
问题:
- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md)
- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md)
- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
{## [原文](https://clickhouse.com/docs/en/faq/production/) ##}
---
slug: /zh/faq/operations/
title: 关于操作ClickHouse服务器和集群的问题
toc_hidden_folder: true
sidebar_position: 3
sidebar_label: Operations
---
# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters}
问题:
- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md)
- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md)
- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
{## [原文](https://clickhouse.com/docs/en/faq/production/) ##}

View File

@ -1,15 +1,15 @@
---
---
slug: /zh/faq/operations/multi-region-replication
title: ClickHouse支持多区域复制吗?
toc_hidden: true
sidebar_position: 30
---
# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication}
简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。
在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。
更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。
title: ClickHouse支持多区域复制吗?
toc_hidden: true
sidebar_position: 30
---
# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication}
简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。
在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。
更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。

View File

@ -0,0 +1 @@
clickhouse

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic chc

View File

@ -0,0 +1,2 @@
[[ -v $_CLICKHOUSE_COMPLETION_LOADED ]] || source "$(dirname "${BASH_SOURCE[0]}")/clickhouse-bootstrap"
_complete_clickhouse_generic chl

View File

@ -31,3 +31,4 @@ function _complete_for_clickhouse_entrypoint_bin()
}
_complete_clickhouse_generic clickhouse _complete_for_clickhouse_entrypoint_bin
_complete_clickhouse_generic ch _complete_for_clickhouse_entrypoint_bin

View File

@ -2,6 +2,7 @@
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/StatusFile.h>
#include <Common/TerminalSize.h>
#include <Databases/registerDatabases.h>
#include <IO/ConnectionTimeouts.h>
#include <Formats/registerFormats.h>
#include <Common/scope_guard_safe.h>
@ -159,6 +160,7 @@ void ClusterCopierApp::mainImpl()
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerDatabases();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ true);

View File

@ -17,6 +17,7 @@
#include <Interpreters/Context.h>
#include <Functions/FunctionFactory.h>
#include <Databases/registerDatabases.h>
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
@ -130,6 +131,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerDatabases();
registerStorages();
registerFormats();

View File

@ -10,6 +10,7 @@
#include <Poco/Logger.h>
#include <Poco/NullChannel.h>
#include <Poco/SimpleFileChannel.h>
#include <Databases/registerDatabases.h>
#include <Databases/DatabaseFilesystem.h>
#include <Databases/DatabaseMemory.h>
#include <Databases/DatabasesOverlay.h>
@ -489,6 +490,7 @@ try
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerDatabases();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ true);
@ -726,12 +728,7 @@ void LocalServer::processConfig()
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
/** Init dummy default DB
* NOTE: We force using isolated default database to avoid conflicts with default database from server environment
* Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory;
* if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
*/
std::string default_database = config().getString("default_database", "_local");
std::string default_database = config().getString("default_database", "default");
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
global_context->setCurrentDatabase(default_database);
@ -744,7 +741,7 @@ void LocalServer::processConfig()
LOG_DEBUG(log, "Loading metadata from {}", path);
auto startup_system_tasks = loadMetadataSystem(global_context);
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
@ -763,7 +760,7 @@ void LocalServer::processConfig()
}
else if (!config().has("no-system-tables"))
{
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
}

View File

@ -158,7 +158,6 @@ std::pair<std::string_view, MainFunc> clickhouse_applications[] =
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
{
#if ENABLE_CLICKHOUSE_LOCAL
{"ch", "local"},
{"chl", "local"},
#endif
#if ENABLE_CLICKHOUSE_CLIENT
@ -502,6 +501,17 @@ int main(int argc_, char ** argv_)
}
}
/// Interpret binary without argument or with arguments starts with dash
/// ('-') as clickhouse-local for better usability:
///
/// clickhouse # dumps help
/// clickhouse -q 'select 1' # use local
/// clickhouse # spawn local
/// clickhouse local # spawn local
///
if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-'))
main_func = mainEntryClickHouseLocal;
return main_func(static_cast<int>(argv.size()), argv.data());
}
#endif

View File

@ -72,6 +72,7 @@
#include <TableFunctions/registerTableFunctions.h>
#include <Formats/registerFormats.h>
#include <Storages/registerStorages.h>
#include <Databases/registerDatabases.h>
#include <Dictionaries/registerDictionaries.h>
#include <Disks/registerDisks.h>
#include <IO/Resource/registerSchedulerNodes.h>
@ -648,6 +649,7 @@ try
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerDatabases();
registerStorages();
registerDictionaries();
registerDisks(/* global_skip_access_check= */ false);

View File

@ -1 +0,0 @@
../../../tests/config/config.d/graphite_alternative.xml

View File

@ -82,7 +82,8 @@ enum class AccessType
\
M(ALTER_VIEW_REFRESH, "ALTER LIVE VIEW REFRESH, REFRESH VIEW", VIEW, ALTER_VIEW) \
M(ALTER_VIEW_MODIFY_QUERY, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \
M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY;
M(ALTER_VIEW_MODIFY_REFRESH, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \
M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY, ALTER VIEW MODIFY REFRESH;
implicitly enabled by the grant ALTER_TABLE */\
\
M(ALTER, "", GROUP, ALL) /* allows to execute ALTER {TABLE|LIVE VIEW} */\
@ -177,6 +178,7 @@ enum class AccessType
M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \
M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \
M(SYSTEM_CLEANUP, "SYSTEM STOP CLEANUP, SYSTEM START CLEANUP", TABLE, SYSTEM) \
M(SYSTEM_VIEWS, "SYSTEM REFRESH VIEW, SYSTEM START VIEWS, SYSTEM STOP VIEWS, SYSTEM START VIEW, SYSTEM STOP VIEW, SYSTEM CANCEL VIEW, REFRESH VIEW, START VIEWS, STOP VIEWS, START VIEW, STOP VIEW, CANCEL VIEW", VIEW, SYSTEM) \
M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \
M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \

View File

@ -155,6 +155,7 @@ namespace
"formats",
"privileges",
"data_type_families",
"database_engines",
"table_engines",
"table_functions",
"aggregate_function_combinators",

View File

@ -51,7 +51,7 @@ TEST(AccessRights, Union)
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, "
"TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1");
}

View File

@ -1,6 +1,8 @@
#include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeNothing.h>
#include <Functions/FunctionFactory.h>
@ -52,6 +54,13 @@ public:
if (!isTuple(rhs_argument_result_type))
return;
if (function_node->getResultType()->equals(DataTypeNullable(std::make_shared<DataTypeNothing>())))
/** The function `equals` can return Nullable(Nothing), e.g., in the case of (a, b) == (NULL, 1).
* On the other hand, `AND` returns Nullable(UInt8), so we would need to convert types.
* It's better to just skip this trivial case.
*/
return;
auto lhs_argument_node_type = lhs_argument->getNodeType();
auto rhs_argument_node_type = rhs_argument->getNodeType();

View File

@ -154,14 +154,14 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
/// If the "alive" node doesn't exist then we don't have connection to the corresponding host.
/// This node is ephemeral so probably it will be recreated soon. We use zookeeper retries to wait.
/// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
String message;
const auto * const suffix = retries_ctl.isLastRetry() ? "" : ", will retry";
if (started)
message = fmt::format("Lost connection to host {}", host);
retries_ctl.setUserError(Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Lost connection to host {}{}", host, suffix));
else
message = fmt::format("No connection to host {} yet", host);
if (!retries_ctl.isLastRetry())
message += ", will retry";
retries_ctl.setUserError(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, message);
retries_ctl.setUserError(Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"No connection to host {} yet{}", host, suffix));
state.disconnected_host = host;
return state;
}

View File

@ -69,10 +69,15 @@ namespace
/// Requests in backups can be extremely long, set to one hour
client_configuration.requestTimeoutMs = 60 * 60 * 1000;
S3::ClientSettings client_settings{
.use_virtual_addressing = s3_uri.is_virtual_hosted_style,
.disable_checksum = local_settings.s3_disable_checksum,
.gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false),
};
return S3::ClientFactory::instance().create(
client_configuration,
s3_uri.is_virtual_hosted_style,
local_settings.s3_disable_checksum,
client_settings,
credentials.GetAWSAccessKeyId(),
credentials.GetAWSSecretKey(),
settings.auth_settings.server_side_encryption_customer_key_base64,

View File

@ -226,6 +226,7 @@ add_object_library(clickhouse_storages_statistics Storages/Statistics)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)
add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
add_object_library(clickhouse_client Client)
add_object_library(clickhouse_bridge BridgeHelper)
add_object_library(clickhouse_server Server)

View File

@ -77,6 +77,7 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti
};
add_column("name", "functions", false, {});
add_column("name", "database_engines", false, {});
add_column("name", "table_engines", false, {});
add_column("name", "formats", false, {});
add_column("name", "table_functions", false, {});

View File

@ -5,6 +5,7 @@
#include <Core/ColumnsWithTypeAndName.h>
#include <Columns/IColumn.h>
namespace DB
{
namespace ErrorCodes
@ -16,7 +17,7 @@ class IFunctionBase;
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
/** A column containing a lambda expression.
* Behaves like a constant-column. Contains an expression, but not input or output data.
* Contains an expression and captured columns, but not input arguments.
*/
class ColumnFunction final : public COWHelper<IColumn, ColumnFunction>
{
@ -207,8 +208,6 @@ private:
bool is_function_compiled;
void appendArgument(const ColumnWithTypeAndName & column);
void addOffsetsForReplication(const IColumn::Offsets & offsets);
};
const ColumnFunction * checkAndGetShortCircuitArgument(const ColumnPtr & column);

View File

@ -0,0 +1,144 @@
#include <Common/CalendarTimeInterval.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
CalendarTimeInterval::CalendarTimeInterval(const CalendarTimeInterval::Intervals & intervals)
{
for (auto [kind, val] : intervals)
{
switch (kind.kind)
{
case IntervalKind::Nanosecond:
case IntervalKind::Microsecond:
case IntervalKind::Millisecond:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sub-second intervals are not supported here");
case IntervalKind::Second:
case IntervalKind::Minute:
case IntervalKind::Hour:
case IntervalKind::Day:
case IntervalKind::Week:
seconds += val * kind.toAvgSeconds();
break;
case IntervalKind::Month:
months += val;
break;
case IntervalKind::Quarter:
months += val * 3;
break;
case IntervalKind::Year:
months += val * 12;
break;
}
}
}
CalendarTimeInterval::Intervals CalendarTimeInterval::toIntervals() const
{
Intervals res;
auto greedy = [&](UInt64 x, std::initializer_list<std::pair<IntervalKind, UInt64>> kinds)
{
for (auto [kind, count] : kinds)
{
UInt64 k = x / count;
if (k == 0)
continue;
x -= k * count;
res.emplace_back(kind, k);
}
chassert(x == 0);
};
greedy(months, {{IntervalKind::Year, 12}, {IntervalKind::Month, 1}});
greedy(seconds, {{IntervalKind::Week, 3600*24*7}, {IntervalKind::Day, 3600*24}, {IntervalKind::Hour, 3600}, {IntervalKind::Minute, 60}, {IntervalKind::Second, 1}});
return res;
}
UInt64 CalendarTimeInterval::minSeconds() const
{
return 3600*24 * (months/12 * 365 + months%12 * 28) + seconds;
}
UInt64 CalendarTimeInterval::maxSeconds() const
{
return 3600*24 * (months/12 * 366 + months%12 * 31) + seconds;
}
void CalendarTimeInterval::assertSingleUnit() const
{
if (seconds && months)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval shouldn't contain both calendar units and clock units (e.g. months and days)");
}
void CalendarTimeInterval::assertPositive() const
{
if (!seconds && !months)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval must be positive");
}
/// Number of whole months between 1970-01-01 and `t`.
static Int64 toAbsoluteMonth(std::chrono::system_clock::time_point t)
{
std::chrono::year_month_day ymd(std::chrono::floor<std::chrono::days>(t));
return (Int64(int(ymd.year())) - 1970) * 12 + Int64(unsigned(ymd.month()) - 1);
}
static std::chrono::sys_seconds startOfAbsoluteMonth(Int64 absolute_month)
{
Int64 year = absolute_month >= 0 ? absolute_month/12 : -((-absolute_month+11)/12);
Int64 month = absolute_month - year*12;
chassert(month >= 0 && month < 12);
std::chrono::year_month_day ymd(
std::chrono::year(int(year + 1970)),
std::chrono::month(unsigned(month + 1)),
std::chrono::day(1));
return std::chrono::sys_days(ymd);
}
std::chrono::sys_seconds CalendarTimeInterval::advance(std::chrono::system_clock::time_point tp) const
{
auto t = std::chrono::sys_seconds(std::chrono::floor<std::chrono::seconds>(tp));
if (months)
{
auto m = toAbsoluteMonth(t);
auto s = t - startOfAbsoluteMonth(m);
t = startOfAbsoluteMonth(m + Int64(months)) + s;
}
return t + std::chrono::seconds(Int64(seconds));
}
std::chrono::sys_seconds CalendarTimeInterval::floor(std::chrono::system_clock::time_point tp) const
{
assertSingleUnit();
assertPositive();
if (months)
return startOfAbsoluteMonth(toAbsoluteMonth(tp) / months * months);
else
{
constexpr std::chrono::seconds epoch(-3600*24*3);
auto t = std::chrono::sys_seconds(std::chrono::floor<std::chrono::seconds>(tp));
/// We want to align with weeks, but 1970-01-01 is a Thursday, so align with 1969-12-29 instead.
return std::chrono::sys_seconds((t.time_since_epoch() - epoch) / seconds * seconds + epoch);
}
}
bool CalendarTimeInterval::operator==(const CalendarTimeInterval & rhs) const
{
return std::tie(months, seconds) == std::tie(rhs.months, rhs.seconds);
}
bool CalendarTimeInterval::operator!=(const CalendarTimeInterval & rhs) const
{
return !(*this == rhs);
}
}

View File

@ -0,0 +1,63 @@
#pragma once
#include <Common/IntervalKind.h>
#include <chrono>
namespace DB
{
/// Represents a duration of calendar time, e.g.:
/// * 2 weeks + 5 minutes + and 21 seconds (aka 605121 seconds),
/// * 1 (calendar) month - not equivalent to any number of seconds!
/// * 3 years + 2 weeks (aka 36 months + 604800 seconds).
///
/// Be careful with calendar arithmetic: it's missing many familiar properties of numbers.
/// E.g. x + y - y is not always equal to x (October 31 + 1 month - 1 month = November 1).
struct CalendarTimeInterval
{
UInt64 seconds = 0;
UInt64 months = 0;
using Intervals = std::vector<std::pair<IntervalKind, UInt64>>;
CalendarTimeInterval() = default;
/// Year, Quarter, Month are converted to months.
/// Week, Day, Hour, Minute, Second are converted to seconds.
/// Millisecond, Microsecond, Nanosecond throw exception.
explicit CalendarTimeInterval(const Intervals & intervals);
/// E.g. for {36 months, 604801 seconds} returns {3 years, 2 weeks, 1 second}.
Intervals toIntervals() const;
/// Approximate shortest and longest duration in seconds. E.g. a month is [28, 31] days.
UInt64 minSeconds() const;
UInt64 maxSeconds() const;
/// Checks that the interval has only months or only seconds, throws otherwise.
void assertSingleUnit() const;
void assertPositive() const;
/// Add this interval to the timestamp. First months, then seconds.
/// Gets weird near month boundaries: October 31 + 1 month = December 1.
std::chrono::sys_seconds advance(std::chrono::system_clock::time_point t) const;
/// Rounds the timestamp down to the nearest timestamp "aligned" with this interval.
/// The interval must satisfy assertSingleUnit() and assertPositive().
/// * For months, rounds to the start of a month whose abosolute index is divisible by `months`.
/// The month index is 0-based starting from January 1970.
/// E.g. if the interval is 1 month, rounds down to the start of the month.
/// * For seconds, rounds to a timestamp x such that (x - December 29 1969 (Monday)) is divisible
/// by this interval.
/// E.g. if the interval is 1 week, rounds down to the start of the week (Monday).
///
/// Guarantees:
/// * advance(floor(x)) > x
/// * floor(advance(floor(x))) = advance(floor(x))
std::chrono::sys_seconds floor(std::chrono::system_clock::time_point t) const;
bool operator==(const CalendarTimeInterval & rhs) const;
bool operator!=(const CalendarTimeInterval & rhs) const;
};
}

View File

@ -253,6 +253,8 @@
M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \
M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \
M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \
M(RefreshableViews, "Number materialized views with periodic refreshing (REFRESH)") \
M(RefreshingViews, "Number of materialized views currently executing a refresh") \
#ifdef APPLY_FOR_EXTERNAL_METRICS
#define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)

View File

@ -71,6 +71,8 @@ struct IntervalKind
/// Returns false if the conversion did not succeed.
/// For example, `IntervalKind::tryParseString('second', result)` returns `result` equals `IntervalKind::Kind::Second`.
static bool tryParseString(const std::string & kind, IntervalKind::Kind & result);
auto operator<=>(const IntervalKind & other) const { return kind <=> other.kind; }
};
/// NOLINTNEXTLINE

View File

@ -28,25 +28,31 @@ namespace ErrorCodes
static thread_local char thread_name[THREAD_NAME_SIZE]{};
void setThreadName(const char * name)
void setThreadName(const char * name, bool truncate)
{
if (strlen(name) > THREAD_NAME_SIZE - 1)
size_t name_len = strlen(name);
if (!truncate && name_len > THREAD_NAME_SIZE - 1)
throw DB::Exception(DB::ErrorCodes::PTHREAD_ERROR, "Thread name cannot be longer than 15 bytes");
size_t name_capped_len = std::min<size_t>(1 + name_len, THREAD_NAME_SIZE - 1);
char name_capped[THREAD_NAME_SIZE];
memcpy(name_capped, name, name_capped_len);
name_capped[name_capped_len] = '\0';
#if defined(OS_FREEBSD)
pthread_set_name_np(pthread_self(), name);
pthread_set_name_np(pthread_self(), name_capped);
if ((false))
#elif defined(OS_DARWIN)
if (0 != pthread_setname_np(name))
if (0 != pthread_setname_np(name_capped))
#elif defined(OS_SUNOS)
if (0 != pthread_setname_np(pthread_self(), name))
if (0 != pthread_setname_np(pthread_self(), name_capped))
#else
if (0 != prctl(PR_SET_NAME, name, 0, 0, 0))
if (0 != prctl(PR_SET_NAME, name_capped, 0, 0, 0))
#endif
if (errno != ENOSYS && errno != EPERM) /// It's ok if the syscall is unsupported or not allowed in some environments.
throw DB::ErrnoException(DB::ErrorCodes::PTHREAD_ERROR, "Cannot set thread name with prctl(PR_SET_NAME, ...)");
memcpy(thread_name, name, std::min<size_t>(1 + strlen(name), THREAD_NAME_SIZE - 1));
memcpy(thread_name, name_capped, name_capped_len);
}
const char * getThreadName()

View File

@ -4,7 +4,9 @@
/** Sets the thread name (maximum length is 15 bytes),
* which will be visible in ps, gdb, /proc,
* for convenience of observation and debugging.
*
* @param truncate - if true, will truncate to 15 automatically, otherwise throw
*/
void setThreadName(const char * name);
void setThreadName(const char * name, bool truncate = false);
const char * getThreadName();

View File

@ -0,0 +1,30 @@
#include <Common/Config/ConfigHelper.h>
#include <Poco/AutoPtr.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/DOM/DOMParser.h>
#include <gtest/gtest.h>
using namespace DB;
TEST(Common, ConfigWithDotInKeys)
{
std::string xml(R"CONFIG(<clickhouse>
<foo.bar>1</foo.bar>
</clickhouse>)CONFIG");
Poco::XML::DOMParser dom_parser;
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
/// directly
EXPECT_EQ(ConfigHelper::getBool(*config, "foo.bar", false, false), false);
EXPECT_EQ(ConfigHelper::getBool(*config, "foo\\.bar", false, false), true);
/// via keys()
Poco::Util::AbstractConfiguration::Keys keys;
config->keys("", keys);
ASSERT_EQ(1, keys.size());
ASSERT_EQ("foo\\.bar", keys[0]);
}

View File

@ -23,7 +23,7 @@ int main(int, char **)
Stopwatch stopwatch;
{
DB::WriteBufferFromFile buf("test1", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
DB::WriteBufferFromFile buf("test1", DB::DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
DB::CompressedWriteBuffer compressed_buf(buf);
stopwatch.restart();

View File

@ -660,6 +660,12 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
switch (type)
{
case nuraft::cb_func::PreAppendLogLeader:
{
/// we cannot preprocess anything new as leader because we don't have up-to-date in-memory state
/// until we preprocess all stored logs
return nuraft::cb_func::ReturnCode::ReturnNull;
}
case nuraft::cb_func::InitialBatchCommited:
{
preprocess_logs();

View File

@ -13,6 +13,7 @@
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/S3/PocoHTTPClient.h>
#include <IO/S3/Client.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <Common/Macros.h>
@ -98,10 +99,15 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
client_configuration.endpointOverride = new_uri.endpoint;
S3::ClientSettings client_settings{
.use_virtual_addressing = new_uri.is_virtual_hosted_style,
.disable_checksum = false,
.gcs_issue_compose_request = false,
};
auto client = S3::ClientFactory::instance().create(
client_configuration,
new_uri.is_virtual_hosted_style,
/* disable_checksum= */ false,
client_settings,
credentials.GetAWSAccessKeyId(),
credentials.GetAWSSecretKey(),
auth_settings.server_side_encryption_customer_key_base64,

View File

@ -1000,7 +1000,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension));
DB::WriteBufferFromFile plain_buf(
"./logs/changelog_11_15.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
"./logs/changelog_11_15.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(0);
DB::KeeperLogStore changelog_reader(
@ -1073,7 +1073,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension));
DB::WriteBufferFromFile plain_buf(
"./logs/changelog_1_20.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
"./logs/changelog_1_20.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(30);
DB::KeeperLogStore changelog_reader(
@ -1130,7 +1130,7 @@ TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3)
EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin"));
DB::WriteBufferFromFile plain_buf(
"./logs/changelog_1_20.bin", DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
"./logs/changelog_1_20.bin", DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(plain_buf.size() - 30);
DB::KeeperLogStore changelog_reader(
@ -1733,7 +1733,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotBroken)
/// Let's corrupt file
DB::WriteBufferFromFile plain_buf(
"./snapshots/snapshot_50.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
"./snapshots/snapshot_50.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
plain_buf.truncate(34);
plain_buf.sync();
@ -2770,7 +2770,7 @@ TEST_P(CoordinationTest, TestDurableState)
{
SCOPED_TRACE("Read from corrupted file");
state_manager.reset();
DB::WriteBufferFromFile write_buf("./state", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY);
DB::WriteBufferFromFile write_buf("./state", DB::DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY);
write_buf.seek(20, SEEK_SET);
DB::writeIntBinary(31, write_buf);
write_buf.sync();
@ -2787,7 +2787,7 @@ TEST_P(CoordinationTest, TestDurableState)
SCOPED_TRACE("Read from file with invalid size");
state_manager.reset();
DB::WriteBufferFromFile write_buf("./state", DBMS_DEFAULT_BUFFER_SIZE, O_TRUNC | O_CREAT | O_WRONLY);
DB::WriteBufferFromFile write_buf("./state", DB::DBMS_DEFAULT_BUFFER_SIZE, O_TRUNC | O_CREAT | O_WRONLY);
DB::writeIntBinary(20, write_buf);
write_buf.sync();
write_buf.close();

View File

@ -31,7 +31,7 @@ bool BackgroundSchedulePoolTaskInfo::schedule()
return true;
}
bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t milliseconds, bool overwrite)
bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t milliseconds, bool overwrite, bool only_if_scheduled)
{
std::lock_guard lock(schedule_mutex);
@ -39,6 +39,8 @@ bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t milliseconds, bool ove
return false;
if (delayed && !overwrite)
return false;
if (!delayed && only_if_scheduled)
return false;
pool.scheduleDelayedTask(shared_from_this(), milliseconds, lock);
return true;

View File

@ -106,8 +106,10 @@ public:
bool schedule();
/// Schedule for execution after specified delay.
/// If overwrite is set then the task will be re-scheduled (if it was already scheduled, i.e. delayed == true).
bool scheduleAfter(size_t milliseconds, bool overwrite = true);
/// If overwrite is set, and the task is already scheduled with a delay (delayed == true),
/// the task will be re-scheduled with the new delay.
/// If only_if_scheduled is set, don't do anything unless the task is already scheduled with a delay.
bool scheduleAfter(size_t milliseconds, bool overwrite = true, bool only_if_scheduled = false);
/// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
void deactivate();

View File

@ -3,66 +3,70 @@
#include <base/defines.h>
#include <base/unit.h>
#define DBMS_DEFAULT_PORT 9000
#define DBMS_DEFAULT_SECURE_PORT 9440
#define DBMS_DEFAULT_CONNECT_TIMEOUT_SEC 10
#define DBMS_DEFAULT_SEND_TIMEOUT_SEC 300
#define DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC 300
namespace DB
{
static constexpr auto DBMS_DEFAULT_PORT = 9000;
static constexpr auto DBMS_DEFAULT_SECURE_PORT = 9440;
static constexpr auto DBMS_DEFAULT_CONNECT_TIMEOUT_SEC = 10;
static constexpr auto DBMS_DEFAULT_SEND_TIMEOUT_SEC = 300;
static constexpr auto DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC = 300;
/// Timeout for synchronous request-result protocol call (like Ping or TablesStatus).
#define DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC 5
#define DBMS_DEFAULT_POLL_INTERVAL 10
static constexpr auto DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC = 5;
static constexpr auto DBMS_DEFAULT_POLL_INTERVAL = 10;
/// The size of the I/O buffer by default.
#define DBMS_DEFAULT_BUFFER_SIZE 1048576ULL
static constexpr auto DBMS_DEFAULT_BUFFER_SIZE = 1048576ULL;
#define PADDING_FOR_SIMD 64
static constexpr auto PADDING_FOR_SIMD = 64;
/** Which blocks by default read the data (by number of rows).
* Smaller values give better cache locality, less consumption of RAM, but more overhead to process the query.
*/
#define DEFAULT_BLOCK_SIZE 65409 /// 65536 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in arrays
static constexpr auto DEFAULT_BLOCK_SIZE
= 65409; /// 65536 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in = arrays
/** Which blocks should be formed for insertion into the table, if we control the formation of blocks.
* (Sometimes the blocks are inserted exactly such blocks that have been read / transmitted from the outside, and this parameter does not affect their size.)
* More than DEFAULT_BLOCK_SIZE, because in some tables a block of data on the disk is created for each block (quite a big thing),
* and if the parts were small, then it would be costly then to combine them.
*/
#define DEFAULT_INSERT_BLOCK_SIZE \
1048449 /// 1048576 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in arrays
static constexpr auto DEFAULT_INSERT_BLOCK_SIZE
= 1048449; /// 1048576 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in arrays
#define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
#define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
static constexpr auto DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC = 60;
static constexpr auto SHOW_CHARS_ON_SYNTAX_ERROR = ptrdiff_t(160);
/// each period reduces the error counter by 2 times
/// too short a period can cause errors to disappear immediately after creation.
#define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD 60
static constexpr auto DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD = 60;
/// replica error max cap, this is to prevent replica from accumulating too many errors and taking to long to recover.
#define DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT 1000
static constexpr auto DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT = 1000;
/// The boundary on which the blocks for asynchronous file operations should be aligned.
#define DEFAULT_AIO_FILE_BLOCK_SIZE 4096
static constexpr auto DEFAULT_AIO_FILE_BLOCK_SIZE = 4096;
#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 30
#define DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT 1
static constexpr auto DEFAULT_HTTP_READ_BUFFER_TIMEOUT = 30;
static constexpr auto DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT = 1;
/// Maximum number of http-connections between two endpoints
/// the number is unmotivated
#define DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT 15
static constexpr auto DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT = 15;
#define DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT 30
static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT = 30;
#define DBMS_DEFAULT_PATH "/var/lib/clickhouse/"
static constexpr auto DBMS_DEFAULT_PATH = "/var/lib/clickhouse/";
/// Actually, there may be multiple acquisitions of different locks for a given table within one query.
/// Check with IStorage class for the list of possible locks
#define DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC 120
static constexpr auto DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC = 120;
/// Default limit on recursion depth of recursive descend parser.
#define DBMS_DEFAULT_MAX_PARSER_DEPTH 1000
static constexpr auto DBMS_DEFAULT_MAX_PARSER_DEPTH = 1000;
/// Default limit on query size.
#define DBMS_DEFAULT_MAX_QUERY_SIZE 262144
static constexpr auto DBMS_DEFAULT_MAX_QUERY_SIZE = 262144;
/// Max depth of hierarchical dictionary
#define DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH 1000
static constexpr auto DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH = 1000;
/// Default maximum (total and entry) sizes and policies of various caches
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU";
@ -95,7 +99,9 @@ static constexpr auto DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS = 30'000'000uz;
///
/// Look at compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h
#if !defined(SANITIZER)
#define QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS 1000000000
static constexpr auto QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS = 1000000000;
#else
#define QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS 0
static constexpr auto QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS = 0;
#endif
}

View File

@ -122,7 +122,7 @@ struct CustomType
bool isSecret() const { return impl->isSecret(); }
const char * getTypeName() const { return impl->getTypeName(); }
String toString(bool show_secrets = true) const { return impl->toString(show_secrets); }
const CustomTypeImpl & getImpl() { return *impl; }
const CustomTypeImpl & getImpl() const { return *impl; }
bool operator < (const CustomType & rhs) const { return *impl < *rhs.impl; }
bool operator <= (const CustomType & rhs) const { return *impl <= *rhs.impl; }
@ -292,7 +292,7 @@ concept not_field_or_bool_or_stringlike
/** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
* NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
*/
#define DBMS_MIN_FIELD_SIZE 32
static constexpr auto DBMS_MIN_FIELD_SIZE = 32;
/** Discriminated union of several types.

View File

@ -3,10 +3,16 @@
#include <Common/JSONBuilder.h>
#include <Core/InterpolateDescription.h>
#include <Interpreters/convertFieldToType.h>
#include <Core/SettingsEnums.h>
#include <Common/IntervalKind.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Interpreters/Aliases.h>
#include <Interpreters/ActionsDAG.h>
namespace DB
{
InterpolateDescription::InterpolateDescription(ActionsDAGPtr actions_, const Aliases & aliases)
: actions(actions_)
{
@ -28,5 +34,4 @@ namespace DB
result_columns_order.push_back(name);
}
}
}

View File

@ -2,20 +2,18 @@
#include <unordered_map>
#include <memory>
#include <cstddef>
#include <string>
#include <Core/Field.h>
#include <Core/SettingsEnums.h>
#include <Common/IntervalKind.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Functions/FunctionsMiscellaneous.h>
#include <Interpreters/Aliases.h>
#include <Core/NamesAndTypes.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
using Aliases = std::unordered_map<String, ASTPtr>;
/// Interpolate description
struct InterpolateDescription
{

View File

@ -5,13 +5,16 @@
#if USE_ICU
#include <unicode/ucnv.h>
#define CHUNK_SIZE 1024
static const char * TARGET_CHARSET = "utf8";
#endif
namespace DB
{
#if USE_ICU
static constexpr auto CHUNK_SIZE = 1024;
static constexpr auto TARGET_CHARSET = "utf8";
#endif
namespace ErrorCodes
{
extern const int UNKNOWN_EXCEPTION;

View File

@ -1,77 +1,80 @@
#pragma once
#define DBMS_MIN_REVISION_WITH_CLIENT_INFO 54032
#define DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE 54058
#define DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO 54060
#define DBMS_MIN_REVISION_WITH_TABLES_STATUS 54226
#define DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE 54337
#define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372
#define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401
#define DBMS_MIN_REVISION_WITH_SERVER_LOGS 54406
namespace DB
{
static constexpr auto DBMS_MIN_REVISION_WITH_CLIENT_INFO = 54032;
static constexpr auto DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE = 54058;
static constexpr auto DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO = 54060;
static constexpr auto DBMS_MIN_REVISION_WITH_TABLES_STATUS = 54226;
static constexpr auto DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE = 54337;
static constexpr auto DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME = 54372;
static constexpr auto DBMS_MIN_REVISION_WITH_VERSION_PATCH = 54401;
static constexpr auto DBMS_MIN_REVISION_WITH_SERVER_LOGS = 54406;
/// Minimum revision with exactly the same set of aggregation methods and rules to select them.
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated).
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54448
#define DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 21
#define DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 4
#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410
static constexpr auto DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD = 54448;
static constexpr auto DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD = 21;
static constexpr auto DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD = 4;
static constexpr auto DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA = 54410;
#define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405
#define DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO 54420
static constexpr auto DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE = 54405;
static constexpr auto DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO = 54420;
/// Minimum revision supporting SettingsBinaryFormat::STRINGS.
#define DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS 54429
#define DBMS_MIN_REVISION_WITH_SCALARS 54429
static constexpr auto DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS = 54429;
static constexpr auto DBMS_MIN_REVISION_WITH_SCALARS = 54429;
/// Minimum revision supporting OpenTelemetry
#define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442
static constexpr auto DBMS_MIN_REVISION_WITH_OPENTELEMETRY = 54442;
#define DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING 54452
static constexpr auto DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING = 54452;
#define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1
static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1;
#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 3
#define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3;
static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453;
#define DBMS_MERGE_TREE_PART_INFO_VERSION 1
static constexpr auto DBMS_MERGE_TREE_PART_INFO_VERSION = 1;
#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441
static constexpr auto DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET = 54441;
#define DBMS_MIN_REVISION_WITH_X_FORWARDED_FOR_IN_CLIENT_INFO 54443
#define DBMS_MIN_REVISION_WITH_REFERER_IN_CLIENT_INFO 54447
static constexpr auto DBMS_MIN_REVISION_WITH_X_FORWARDED_FOR_IN_CLIENT_INFO = 54443;
static constexpr auto DBMS_MIN_REVISION_WITH_REFERER_IN_CLIENT_INFO = 54447;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH 54448
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH = 54448;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS 54451
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS = 54451;
#define DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION 54454
static constexpr auto DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION = 54454;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME = 54449;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT 54456
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT = 54456;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_VIEW_IF_PERMITTED 54457
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_VIEW_IF_PERMITTED = 54457;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM 54458
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM = 54458;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY 54458
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY = 54458;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS 54459
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS = 54459;
/// The server will send query elapsed run time in the Progress packet.
#define DBMS_MIN_PROTOCOL_VERSION_WITH_SERVER_QUERY_TIME_IN_PROGRESS 54460
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_SERVER_QUERY_TIME_IN_PROGRESS = 54460;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES 54461
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES = 54461;
#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 54462
static constexpr auto DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 = 54462;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_TOTAL_BYTES_IN_PROGRESS 54463
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_TOTAL_BYTES_IN_PROGRESS = 54463;
#define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54464
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES = 54464;
#define DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION 54465
static constexpr auto DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION = 54465;
#define DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION 54466
static constexpr auto DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION = 54466;
/// Version of ClickHouse TCP protocol.
///
@ -80,4 +83,6 @@
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases).
#define DBMS_TCP_PROTOCOL_VERSION 54466
static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54466;
}

View File

@ -157,7 +157,7 @@ class IColumn;
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
@ -584,6 +584,8 @@ class IColumn;
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \

View File

@ -98,6 +98,8 @@ IMPLEMENT_SETTING_AUTO_ENUM(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUMENTS)
IMPLEMENT_SETTING_AUTO_ENUM(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS)
IMPLEMENT_SETTING_AUTO_ENUM(CleanDeletedRows, ErrorCodes::BAD_ARGUMENTS)
IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL,
{{"decimal", MySQLDataTypesSupport::DECIMAL},
{"datetime64", MySQLDataTypesSupport::DATETIME64},

View File

@ -140,6 +140,14 @@ enum class DefaultTableEngine
DECLARE_SETTING_ENUM(DefaultTableEngine)
enum class CleanDeletedRows
{
Never = 0, /// Disable.
Always,
};
DECLARE_SETTING_ENUM(CleanDeletedRows)
enum class MySQLDataTypesSupport
{
DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable

View File

@ -1,6 +1,7 @@
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseOnDisk.h>
#include <Databases/DatabaseReplicated.h>
#include <Databases/DatabaseFactory.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromFile.h>
@ -622,4 +623,16 @@ void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid)
assertDetachedTableNotInUse(uuid);
}
void registerDatabaseAtomic(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
return make_shared<DatabaseAtomic>(
args.database_name,
args.metadata_path,
args.uuid,
args.context);
};
factory.registerDatabase("Atomic", create_fn);
}
}

View File

@ -1,4 +1,5 @@
#include <Databases/DatabaseDictionary.h>
#include <Databases/DatabaseFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Dictionaries/DictionaryStructure.h>
@ -140,4 +141,14 @@ void DatabaseDictionary::shutdown()
{
}
void registerDatabaseDictionary(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
return make_shared<DatabaseDictionary>(
args.database_name,
args.context);
};
factory.registerDatabase("Dictionary", create_fn);
}
}

View File

@ -1,60 +1,15 @@
#include <Databases/DatabaseFactory.h>
#include <filesystem>
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseDictionary.h>
#include <Databases/DatabaseFilesystem.h>
#include <Databases/DatabaseLazy.h>
#include <Databases/DatabaseMemory.h>
#include <Databases/DatabaseOrdinary.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseReplicated.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/queryToString.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Common/logger_useful.h>
#include <Common/Macros.h>
#include <Common/filesystemHelpers.h>
#include "config.h"
#if USE_MYSQL
# include <Core/MySQL/MySQLClient.h>
# include <Databases/MySQL/DatabaseMySQL.h>
# include <Databases/MySQL/MaterializedMySQLSettings.h>
# include <Storages/MySQL/MySQLHelpers.h>
# include <Storages/MySQL/MySQLSettings.h>
# include <Storages/StorageMySQL.h>
# include <Databases/MySQL/DatabaseMaterializedMySQL.h>
# include <mysqlxx/Pool.h>
#endif
#if USE_MYSQL || USE_LIBPQXX
#include <Common/parseRemoteDescription.h>
#include <Common/parseAddress.h>
#endif
#if USE_LIBPQXX
#include <Databases/PostgreSQL/DatabasePostgreSQL.h>
#include <Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h>
#include <Storages/PostgreSQL/MaterializedPostgreSQLSettings.h>
#include <Storages/StoragePostgreSQL.h>
#endif
#if USE_SQLITE
#include <Databases/SQLite/DatabaseSQLite.h>
#endif
#if USE_AWS_S3
#include <Databases/DatabaseS3.h>
#endif
#if USE_HDFS
#include <Databases/DatabaseHDFS.h>
#endif
#include <Common/logger_useful.h>
namespace fs = std::filesystem;
@ -67,7 +22,7 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int CANNOT_CREATE_DATABASE;
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
void cckMetadataPathForOrdinary(const ASTCreateQuery & create, const String & metadata_path)
@ -103,8 +58,47 @@ void cckMetadataPathForOrdinary(const ASTCreateQuery & create, const String & me
}
/// validate validates the database engine that's specified in the create query for
/// engine arguments, settings and table overrides.
void validate(const ASTCreateQuery & create_query)
{
auto * storage = create_query.storage;
/// Check engine may have arguments
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
const String & engine_name = storage->engine->name;
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
if (storage->engine->arguments && !engine_may_have_arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name);
/// Check engine may have settings
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
bool has_unexpected_element = storage->engine->parameters || storage->partition_by ||
storage->primary_key || storage->order_by ||
storage->sample_by;
if (has_unexpected_element || (!may_have_settings && storage->settings))
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST,
"Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name);
/// Check engine with table overrides
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"};
if (create_query.table_overrides && !engines_with_table_overrides.contains(engine_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have table overrides", engine_name);
}
DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context)
{
/// check if the database engine is a valid one before proceeding
if (!database_engines.contains(create.storage->engine->name))
throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", create.storage->engine->name);
/// if the engine is found (i.e. registered with the factory instance), then validate if the
/// supplied engine arguments, settings and table overrides are valid for the engine.
validate(create);
cckMetadataPathForOrdinary(create, metadata_path);
DatabasePtr impl = getImpl(create, metadata_path, context);
@ -119,383 +113,42 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m
return impl;
}
template <typename ValueType>
static inline ValueType safeGetLiteralValue(const ASTPtr &ast, const String &engine_name)
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn)
{
if (!ast || !ast->as<ASTLiteral>())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine {} requested literal argument.", engine_name);
if (!database_engines.emplace(name, std::move(creator_fn)).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "DatabaseFactory: the database engine name '{}' is not unique", name);
}
return ast->as<ASTLiteral>()->value.safeGet<ValueType>();
DatabaseFactory & DatabaseFactory::instance()
{
static DatabaseFactory db_fact;
return db_fact;
}
DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context)
{
auto * engine_define = create.storage;
auto * storage = create.storage;
const String & database_name = create.getDatabase();
const String & engine_name = engine_define->engine->name;
const UUID & uuid = create.uuid;
static const std::unordered_set<std::string_view> database_engines{"Ordinary", "Atomic", "Memory",
"Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL",
"PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
if (!database_engines.contains(engine_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name);
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"};
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
if (engine_define->engine->arguments && !engine_may_have_arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name);
bool has_unexpected_element = engine_define->engine->parameters || engine_define->partition_by ||
engine_define->primary_key || engine_define->order_by ||
engine_define->sample_by;
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
if (has_unexpected_element || (!may_have_settings && engine_define->settings))
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST,
"Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name);
if (create.table_overrides && !engines_with_table_overrides.contains(engine_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have table overrides", engine_name);
if (engine_name == "Ordinary")
{
if (!create.attach && !context->getSettingsRef().allow_deprecated_database_ordinary)
throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE,
"Ordinary database engine is deprecated (see also allow_deprecated_database_ordinary setting)");
return std::make_shared<DatabaseOrdinary>(database_name, metadata_path, context);
}
if (engine_name == "Atomic")
return std::make_shared<DatabaseAtomic>(database_name, metadata_path, uuid, context);
else if (engine_name == "Memory")
return std::make_shared<DatabaseMemory>(database_name, context);
else if (engine_name == "Dictionary")
return std::make_shared<DatabaseDictionary>(database_name, context);
#if USE_MYSQL
else if (engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
StorageMySQL::Configuration configuration;
ASTs & arguments = engine->arguments->children;
auto mysql_settings = std::make_unique<MySQLSettings>();
if (auto named_collection = tryGetNamedCollectionWithOverrides(arguments, context))
{
configuration = StorageMySQL::processNamedCollectionResult(*named_collection, *mysql_settings, context, false);
}
else
{
if (arguments.size() != 4)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"MySQL database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.");
arguments[1] = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[1], context);
const auto & host_port = safeGetLiteralValue<String>(arguments[0], engine_name);
if (engine_name == "MySQL")
{
size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
configuration.addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306);
}
else
{
const auto & [remote_host, remote_port] = parseAddress(host_port, 3306);
configuration.host = remote_host;
configuration.port = remote_port;
}
configuration.database = safeGetLiteralValue<String>(arguments[1], engine_name);
configuration.username = safeGetLiteralValue<String>(arguments[2], engine_name);
configuration.password = safeGetLiteralValue<String>(arguments[3], engine_name);
}
try
{
if (engine_name == "MySQL")
{
mysql_settings->loadFromQueryContext(context, *engine_define);
if (engine_define->settings)
mysql_settings->loadFromQuery(*engine_define);
auto mysql_pool = createMySQLPoolWithFailover(configuration, *mysql_settings);
return std::make_shared<DatabaseMySQL>(
context, database_name, metadata_path, engine_define, configuration.database,
std::move(mysql_settings), std::move(mysql_pool), create.attach);
}
MySQLClient client(configuration.host, configuration.port, configuration.username, configuration.password);
auto mysql_pool = mysqlxx::Pool(configuration.database, configuration.host, configuration.username, configuration.password, configuration.port);
auto materialize_mode_settings = std::make_unique<MaterializedMySQLSettings>();
if (engine_define->settings)
materialize_mode_settings->loadFromQuery(*engine_define);
if (uuid == UUIDHelpers::Nil)
{
auto print_create_ast = create.clone();
print_create_ast->as<ASTCreateQuery>()->attach = false;
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"The MaterializedMySQL database engine no longer supports Ordinary databases. To re-create the database, delete "
"the old one by executing \"rm -rf {}{{,.sql}}\", then re-create the database with the following query: {}",
metadata_path,
queryToString(print_create_ast));
}
return std::make_shared<DatabaseMaterializedMySQL>(
context, database_name, metadata_path, uuid, configuration.database, std::move(mysql_pool),
std::move(client), std::move(materialize_mode_settings));
}
catch (...)
{
const auto & exception_message = getCurrentExceptionMessage(true);
throw Exception(ErrorCodes::CANNOT_CREATE_DATABASE, "Cannot create MySQL database, because {}", exception_message);
}
}
#endif
else if (engine_name == "Lazy")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Lazy database require cache_expiration_time_seconds argument");
const auto & arguments = engine->arguments->children;
const auto cache_expiration_time_seconds = safeGetLiteralValue<UInt64>(arguments[0], "Lazy");
return std::make_shared<DatabaseLazy>(database_name, metadata_path, cache_expiration_time_seconds, context);
}
else if (engine_name == "Replicated")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 3)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replicated database requires 3 arguments: zookeeper path, shard name and replica name");
auto & arguments = engine->arguments->children;
for (auto & engine_arg : arguments)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
String zookeeper_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
String shard_name = safeGetLiteralValue<String>(arguments[1], "Replicated");
String replica_name = safeGetLiteralValue<String>(arguments[2], "Replicated");
zookeeper_path = context->getMacros()->expand(zookeeper_path);
shard_name = context->getMacros()->expand(shard_name);
replica_name = context->getMacros()->expand(replica_name);
DatabaseReplicatedSettings database_replicated_settings{};
if (engine_define->settings)
database_replicated_settings.loadFromQuery(*engine_define);
return std::make_shared<DatabaseReplicated>(database_name, metadata_path, uuid,
zookeeper_path, shard_name, replica_name,
std::move(database_replicated_settings), context);
}
#if USE_LIBPQXX
else if (engine_name == "PostgreSQL")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
ASTs & engine_args = engine->arguments->children;
auto use_table_cache = false;
StoragePostgreSQL::Configuration configuration;
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
{
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false);
use_table_cache = named_collection->getOrDefault<UInt64>("use_table_cache", 0);
}
else
{
if (engine_args.size() < 4 || engine_args.size() > 6)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"PostgreSQL Database require `host:port`, `database_name`, `username`, `password`"
"[, `schema` = "", `use_table_cache` = 0");
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
const auto & host_port = safeGetLiteralValue<String>(engine_args[0], engine_name);
size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
configuration.addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 5432);
configuration.database = safeGetLiteralValue<String>(engine_args[1], engine_name);
configuration.username = safeGetLiteralValue<String>(engine_args[2], engine_name);
configuration.password = safeGetLiteralValue<String>(engine_args[3], engine_name);
bool is_deprecated_syntax = false;
if (engine_args.size() >= 5)
{
auto arg_value = engine_args[4]->as<ASTLiteral>()->value;
if (arg_value.getType() == Field::Types::Which::String)
{
configuration.schema = safeGetLiteralValue<String>(engine_args[4], engine_name);
}
else
{
use_table_cache = safeGetLiteralValue<UInt8>(engine_args[4], engine_name);
LOG_WARNING(&Poco::Logger::get("DatabaseFactory"), "A deprecated syntax of PostgreSQL database engine is used");
is_deprecated_syntax = true;
}
}
if (!is_deprecated_syntax && engine_args.size() >= 6)
use_table_cache = safeGetLiteralValue<UInt8>(engine_args[5], engine_name);
}
const auto & settings = context->getSettingsRef();
auto pool = std::make_shared<postgres::PoolWithFailover>(
configuration,
settings.postgresql_connection_pool_size,
settings.postgresql_connection_pool_wait_timeout,
POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES,
settings.postgresql_connection_pool_auto_close_connection);
return std::make_shared<DatabasePostgreSQL>(
context, metadata_path, engine_define, database_name, configuration, pool, use_table_cache);
}
else if (engine_name == "MaterializedPostgreSQL")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
ASTs & engine_args = engine->arguments->children;
StoragePostgreSQL::Configuration configuration;
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
{
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, context, false);
}
else
{
if (engine_args.size() != 4)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"MaterializedPostgreSQL Database require `host:port`, `database_name`, `username`, `password`.");
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
auto parsed_host_port = parseAddress(safeGetLiteralValue<String>(engine_args[0], engine_name), 5432);
configuration.host = parsed_host_port.first;
configuration.port = parsed_host_port.second;
configuration.database = safeGetLiteralValue<String>(engine_args[1], engine_name);
configuration.username = safeGetLiteralValue<String>(engine_args[2], engine_name);
configuration.password = safeGetLiteralValue<String>(engine_args[3], engine_name);
}
auto connection_info = postgres::formatConnectionString(
configuration.database, configuration.host, configuration.port, configuration.username, configuration.password);
auto postgresql_replica_settings = std::make_unique<MaterializedPostgreSQLSettings>();
if (engine_define->settings)
postgresql_replica_settings->loadFromQuery(*engine_define);
return std::make_shared<DatabaseMaterializedPostgreSQL>(
context, metadata_path, uuid, create.attach,
database_name, configuration.database, connection_info,
std::move(postgresql_replica_settings));
}
#endif
#if USE_SQLITE
else if (engine_name == "SQLite")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "SQLite database requires 1 argument: database path");
const auto & arguments = engine->arguments->children;
String database_path = safeGetLiteralValue<String>(arguments[0], "SQLite");
return std::make_shared<DatabaseSQLite>(context, engine_define, create.attach, database_path);
}
#endif
else if (engine_name == "Filesystem")
{
const ASTFunction * engine = engine_define->engine;
/// If init_path is empty, then the current path will be used
std::string init_path;
if (engine->arguments && !engine->arguments->children.empty())
{
if (engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires at most 1 argument: filesystem_path");
const auto & arguments = engine->arguments->children;
init_path = safeGetLiteralValue<String>(arguments[0], engine_name);
}
return std::make_shared<DatabaseFilesystem>(database_name, init_path, context);
}
#if USE_AWS_S3
else if (engine_name == "S3")
{
const ASTFunction * engine = engine_define->engine;
DatabaseS3::Configuration config;
if (engine->arguments && !engine->arguments->children.empty())
{
ASTs & engine_args = engine->arguments->children;
config = DatabaseS3::parseArguments(engine_args, context);
}
return std::make_shared<DatabaseS3>(database_name, config, context);
}
#endif
#if USE_HDFS
else if (engine_name == "HDFS")
{
const ASTFunction * engine = engine_define->engine;
/// If source_url is empty, then table name must contain full url
std::string source_url;
if (engine->arguments && !engine->arguments->children.empty())
{
if (engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS database requires at most 1 argument: source_url");
const auto & arguments = engine->arguments->children;
source_url = safeGetLiteralValue<String>(arguments[0], engine_name);
}
return std::make_shared<DatabaseHDFS>(database_name, source_url, context);
}
#endif
throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", engine_name);
const String & engine_name = storage->engine->name;
bool has_engine_args = false;
if (storage->engine->arguments)
has_engine_args = true;
ASTs empty_engine_args;
Arguments arguments{
.engine_name = engine_name,
.engine_args = has_engine_args ? storage->engine->arguments->children : empty_engine_args,
.create_query = create,
.database_name = database_name,
.metadata_path = metadata_path,
.uuid = create.uuid,
.context = context};
// creator_fn creates and returns a DatabasePtr with the supplied arguments
auto creator_fn = database_engines.at(engine_name);
return creator_fn(arguments);
}
}

View File

@ -2,18 +2,60 @@
#include <Interpreters/Context_fwd.h>
#include <Databases/IDatabase.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTLiteral.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
class ASTCreateQuery;
class DatabaseFactory
template <typename ValueType>
static inline ValueType safeGetLiteralValue(const ASTPtr &ast, const String &engine_name)
{
if (!ast || !ast->as<ASTLiteral>())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine {} requested literal argument.", engine_name);
return ast->as<ASTLiteral>()->value.safeGet<ValueType>();
}
class DatabaseFactory : private boost::noncopyable
{
public:
static DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
static DatabasePtr getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
static DatabaseFactory & instance();
struct Arguments
{
const String & engine_name;
ASTs & engine_args;
ASTStorage * storage;
const ASTCreateQuery & create_query;
const String & database_name;
const String & metadata_path;
const UUID & uuid;
ContextPtr & context;
};
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
using CreatorFn = std::function<DatabasePtr(const Arguments & arguments)>;
using DatabaseEngines = std::unordered_map<std::string, CreatorFn>;
void registerDatabase(const std::string & name, CreatorFn creator_fn);
const DatabaseEngines & getDatabaseEngines() const { return database_engines; }
private:
DatabaseEngines database_engines;
DatabasePtr getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
};
}

View File

@ -1,3 +1,4 @@
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseFilesystem.h>
#include <IO/Operators.h>
@ -237,4 +238,28 @@ DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, cons
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
}
void registerDatabaseFilesystem(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
const String & engine_name = engine_define->engine->name;
/// If init_path is empty, then the current path will be used
std::string init_path;
if (engine->arguments && !engine->arguments->children.empty())
{
if (engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires at most 1 argument: filesystem_path");
const auto & arguments = engine->arguments->children;
init_path = safeGetLiteralValue<String>(arguments[0], engine_name);
}
return std::make_shared<DatabaseFilesystem>(args.database_name, init_path, args.context);
};
factory.registerDatabase("Filesystem", create_fn);
}
}

View File

@ -2,6 +2,7 @@
#if USE_HDFS
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseHDFS.h>
#include <Interpreters/Context.h>
@ -237,6 +238,30 @@ DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const Filt
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
}
void registerDatabaseHDFS(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
const String & engine_name = engine_define->engine->name;
/// If source_url is empty, then table name must contain full url
std::string source_url;
if (engine->arguments && !engine->arguments->children.empty())
{
if (engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS database requires at most 1 argument: source_url");
const auto & arguments = engine->arguments->children;
source_url = safeGetLiteralValue<String>(arguments[0], engine_name);
}
return std::make_shared<DatabaseHDFS>(args.database_name, source_url, args.context);
};
factory.registerDatabase("HDFS", create_fn);
}
} // DB
#endif

View File

@ -1,4 +1,5 @@
#include <Core/Settings.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseLazy.h>
#include <Databases/DatabaseOnDisk.h>
#include <Databases/DatabasesCommon.h>
@ -7,6 +8,7 @@
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Storages/IStorage.h>
#include <Common/escapeForFileName.h>
@ -34,6 +36,7 @@ namespace ErrorCodes
extern const int UNKNOWN_TABLE;
extern const int UNSUPPORTED_METHOD;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
@ -354,4 +357,26 @@ const StoragePtr & DatabaseLazyIterator::table() const
return current_storage;
}
void registerDatabaseLazy(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Lazy database require cache_expiration_time_seconds argument");
const auto & arguments = engine->arguments->children;
const auto cache_expiration_time_seconds = safeGetLiteralValue<UInt64>(arguments[0], "Lazy");
return make_shared<DatabaseLazy>(
args.database_name,
args.metadata_path,
cache_expiration_time_seconds,
args.context);
};
factory.registerDatabase("Lazy", create_fn);
}
}

View File

@ -1,5 +1,6 @@
#include <base/scope_guard.h>
#include <Common/logger_useful.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseMemory.h>
#include <Databases/DatabasesCommon.h>
#include <Databases/DDLDependencyVisitor.h>
@ -209,4 +210,15 @@ std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseMemory::getTablesForBackup(co
return res;
}
void registerDatabaseMemory(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
return make_shared<DatabaseMemory>(
args.database_name,
args.context);
};
factory.registerDatabase("Memory", create_fn);
}
}

View File

@ -1,6 +1,7 @@
#include <filesystem>
#include <Core/Settings.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseOnDisk.h>
#include <Databases/DatabaseOrdinary.h>
#include <Databases/DatabasesCommon.h>
@ -37,6 +38,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_DATABASE_ENGINE;
}
static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
@ -321,4 +323,19 @@ void DatabaseOrdinary::commitAlterTable(const StorageID &, const String & table_
}
}
void registerDatabaseOrdinary(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
if (!args.create_query.attach && !args.context->getSettingsRef().allow_deprecated_database_ordinary)
throw Exception(
ErrorCodes::UNKNOWN_DATABASE_ENGINE,
"Ordinary database engine is deprecated (see also allow_deprecated_database_ordinary setting)");
return make_shared<DatabaseOrdinary>(
args.database_name,
args.metadata_path,
args.context);
};
factory.registerDatabase("Ordinary", create_fn);
}
}

View File

@ -13,6 +13,7 @@
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/PoolId.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseReplicated.h>
#include <Databases/DatabaseReplicatedWorker.h>
#include <Databases/DDLDependencyVisitor.h>
@ -1054,7 +1055,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
for (auto & [_, intermediate, to] : replicated_tables_to_rename)
rename_table(intermediate, to);
LOG_DEBUG(log, "Renames completed succesessfully");
LOG_DEBUG(log, "Renames completed successfully");
for (const auto & id : dropped_tables)
DatabaseCatalog::instance().waitTableFinallyDropped(id);
@ -1652,4 +1653,41 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
return true;
}
void registerDatabaseReplicated(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 3)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replicated database requires 3 arguments: zookeeper path, shard name and replica name");
auto & arguments = engine->arguments->children;
for (auto & engine_arg : arguments)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.context);
String zookeeper_path = safeGetLiteralValue<String>(arguments[0], "Replicated");
String shard_name = safeGetLiteralValue<String>(arguments[1], "Replicated");
String replica_name = safeGetLiteralValue<String>(arguments[2], "Replicated");
zookeeper_path = args.context->getMacros()->expand(zookeeper_path);
shard_name = args.context->getMacros()->expand(shard_name);
replica_name = args.context->getMacros()->expand(replica_name);
DatabaseReplicatedSettings database_replicated_settings{};
if (engine_define->settings)
database_replicated_settings.loadFromQuery(*engine_define);
return std::make_shared<DatabaseReplicated>(
args.database_name,
args.metadata_path,
args.uuid,
zookeeper_path,
shard_name,
replica_name,
std::move(database_replicated_settings), args.context);
};
factory.registerDatabase("Replicated", create_fn);
}
}

View File

@ -2,6 +2,7 @@
#if USE_AWS_S3
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseS3.h>
#include <Interpreters/Context.h>
@ -307,6 +308,24 @@ DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const Filter
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
}
}
void registerDatabaseS3(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
DatabaseS3::Configuration config;
if (engine->arguments && !engine->arguments->children.empty())
{
ASTs & engine_args = engine->arguments->children;
config = DatabaseS3::parseArguments(engine_args, args.context);
}
return std::make_shared<DatabaseS3>(args.database_name, config, args.context);
};
factory.registerDatabase("S3", create_fn);
}
}
#endif

View File

@ -65,6 +65,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
query->replace(ast_create_query.select, metadata.select.select_query);
}
if (metadata.refresh)
{
query->replace(ast_create_query.refresh_strategy, metadata.refresh);
}
/// MaterializedView, Dictionary are types of CREATE query without storage.
if (ast_create_query.storage)
{

View File

@ -2,13 +2,20 @@
#if USE_MYSQL
# include <Common/parseAddress.h>
# include <Common/parseRemoteDescription.h>
# include <Databases/MySQL/DatabaseMaterializedMySQL.h>
# include <Interpreters/Context.h>
# include <Interpreters/evaluateConstantExpression.h>
# include <Databases/DatabaseFactory.h>
# include <Databases/MySQL/DatabaseMaterializedTablesIterator.h>
# include <Databases/MySQL/MaterializedMySQLSyncThread.h>
# include <Parsers/ASTCreateQuery.h>
# include <Parsers/ASTFunction.h>
# include <Parsers/queryToString.h>
# include <Storages/StorageMySQL.h>
# include <Storages/StorageMaterializedMySQL.h>
# include <Storages/NamedCollectionsHelpers.h>
# include <Common/setThreadName.h>
# include <Common/PoolId.h>
# include <filesystem>
@ -21,6 +28,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
}
DatabaseMaterializedMySQL::DatabaseMaterializedMySQL(
@ -179,6 +187,86 @@ void DatabaseMaterializedMySQL::stopReplication()
started_up = false;
}
void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
const String & engine_name = engine_define->engine->name;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
StorageMySQL::Configuration configuration;
ASTs & arguments = engine->arguments->children;
auto mysql_settings = std::make_unique<MySQLSettings>();
if (auto named_collection = tryGetNamedCollectionWithOverrides(arguments, args.context))
{
configuration = StorageMySQL::processNamedCollectionResult(*named_collection, *mysql_settings, args.context, false);
}
else
{
if (arguments.size() != 4)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"MySQL database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.");
arguments[1] = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[1], args.context);
const auto & host_port = safeGetLiteralValue<String>(arguments[0], engine_name);
if (engine_name == "MySQL")
{
size_t max_addresses = args.context->getSettingsRef().glob_expansion_max_elements;
configuration.addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306);
}
else
{
const auto & [remote_host, remote_port] = parseAddress(host_port, 3306);
configuration.host = remote_host;
configuration.port = remote_port;
}
configuration.database = safeGetLiteralValue<String>(arguments[1], engine_name);
configuration.username = safeGetLiteralValue<String>(arguments[2], engine_name);
configuration.password = safeGetLiteralValue<String>(arguments[3], engine_name);
}
MySQLClient client(configuration.host, configuration.port, configuration.username, configuration.password);
auto mysql_pool
= mysqlxx::Pool(configuration.database, configuration.host, configuration.username, configuration.password, configuration.port);
auto materialize_mode_settings = std::make_unique<MaterializedMySQLSettings>();
if (engine_define->settings)
materialize_mode_settings->loadFromQuery(*engine_define);
if (args.uuid == UUIDHelpers::Nil)
{
auto print_create_ast = args.create_query.clone();
print_create_ast->as<ASTCreateQuery>()->attach = false;
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"The MaterializedMySQL database engine no longer supports Ordinary databases. To re-create the database, delete "
"the old one by executing \"rm -rf {}{{,.sql}}\", then re-create the database with the following query: {}",
args.metadata_path,
queryToString(print_create_ast));
}
return make_shared<DatabaseMaterializedMySQL>(
args.context,
args.database_name,
args.metadata_path,
args.uuid,
configuration.database,
std::move(mysql_pool),
std::move(client),
std::move(materialize_mode_settings));
};
factory.registerDatabase("MaterializeMySQL", create_fn);
factory.registerDatabase("MaterializedMySQL", create_fn);
}
}
#endif

View File

@ -2,6 +2,7 @@
#if USE_MYSQL
# include <string>
# include <Databases/DatabaseFactory.h>
# include <DataTypes/DataTypeDateTime.h>
# include <DataTypes/DataTypeNullable.h>
# include <DataTypes/DataTypeString.h>
@ -14,6 +15,7 @@
# include <QueryPipeline/QueryPipelineBuilder.h>
# include <IO/Operators.h>
# include <Interpreters/Context.h>
# include <Interpreters/evaluateConstantExpression.h>
# include <Parsers/ASTCreateQuery.h>
# include <Parsers/ASTFunction.h>
# include <Parsers/ParserCreateQuery.h>
@ -21,8 +23,11 @@
# include <Parsers/queryToString.h>
# include <Storages/StorageMySQL.h>
# include <Storages/MySQL/MySQLSettings.h>
# include <Storages/MySQL/MySQLHelpers.h>
# include <Storages/NamedCollectionsHelpers.h>
# include <Common/escapeForFileName.h>
# include <Common/parseAddress.h>
# include <Common/parseRemoteDescription.h>
# include <Common/setThreadName.h>
# include <filesystem>
# include <Common/filesystemHelpers.h>
@ -41,6 +46,8 @@ namespace ErrorCodes
extern const int TABLE_IS_DROPPED;
extern const int TABLE_ALREADY_EXISTS;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int CANNOT_CREATE_DATABASE;
extern const int BAD_ARGUMENTS;
}
constexpr static const auto suffix = ".remove_flag";
@ -504,6 +511,77 @@ void DatabaseMySQL::createTable(ContextPtr local_context, const String & table_n
attachTable(local_context, table_name, storage, {});
}
void registerDatabaseMySQL(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
const String & engine_name = engine_define->engine->name;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
StorageMySQL::Configuration configuration;
ASTs & arguments = engine->arguments->children;
auto mysql_settings = std::make_unique<MySQLSettings>();
if (auto named_collection = tryGetNamedCollectionWithOverrides(arguments, args.context))
{
configuration = StorageMySQL::processNamedCollectionResult(*named_collection, *mysql_settings, args.context, false);
}
else
{
if (arguments.size() != 4)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"MySQL database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.");
arguments[1] = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[1], args.context);
const auto & host_port = safeGetLiteralValue<String>(arguments[0], engine_name);
if (engine_name == "MySQL")
{
size_t max_addresses = args.context->getSettingsRef().glob_expansion_max_elements;
configuration.addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306);
}
else
{
const auto & [remote_host, remote_port] = parseAddress(host_port, 3306);
configuration.host = remote_host;
configuration.port = remote_port;
}
configuration.database = safeGetLiteralValue<String>(arguments[1], engine_name);
configuration.username = safeGetLiteralValue<String>(arguments[2], engine_name);
configuration.password = safeGetLiteralValue<String>(arguments[3], engine_name);
}
mysql_settings->loadFromQueryContext(args.context, *engine_define);
if (engine_define->settings)
mysql_settings->loadFromQuery(*engine_define);
auto mysql_pool = createMySQLPoolWithFailover(configuration, *mysql_settings);
try
{
return make_shared<DatabaseMySQL>(
args.context,
args.database_name,
args.metadata_path,
engine_define,
configuration.database,
std::move(mysql_settings),
std::move(mysql_pool),
args.create_query.attach);
}
catch (...)
{
const auto & exception_message = getCurrentExceptionMessage(true);
throw Exception(ErrorCodes::CANNOT_CREATE_DATABASE, "Cannot create MySQL database, because {}", exception_message);
}
};
factory.registerDatabase("MySQL", create_fn);
}
}
#endif

View File

@ -8,23 +8,25 @@
#include <Common/logger_useful.h>
#include <Common/Macros.h>
#include <Common/PoolId.h>
#include <Common/parseAddress.h>
#include <Common/parseRemoteDescription.h>
#include <Core/UUID.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Databases/DatabaseOrdinary.h>
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseFactory.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Storages/StoragePostgreSQL.h>
#include <Storages/AlterCommands.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/InterpreterAlterQuery.h>
#include <Common/escapeForFileName.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/File.h>
namespace DB
{
@ -471,6 +473,59 @@ DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator(
return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name);
}
void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
ASTs & engine_args = engine->arguments->children;
const String & engine_name = engine_define->engine->name;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
StoragePostgreSQL::Configuration configuration;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, args.context))
{
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, args.context, false);
}
else
{
if (engine_args.size() != 4)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"MaterializedPostgreSQL Database require `host:port`, `database_name`, `username`, `password`.");
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.context);
auto parsed_host_port = parseAddress(safeGetLiteralValue<String>(engine_args[0], engine_name), 5432);
configuration.host = parsed_host_port.first;
configuration.port = parsed_host_port.second;
configuration.database = safeGetLiteralValue<String>(engine_args[1], engine_name);
configuration.username = safeGetLiteralValue<String>(engine_args[2], engine_name);
configuration.password = safeGetLiteralValue<String>(engine_args[3], engine_name);
}
auto connection_info = postgres::formatConnectionString(
configuration.database, configuration.host, configuration.port, configuration.username, configuration.password);
auto postgresql_replica_settings = std::make_unique<MaterializedPostgreSQLSettings>();
if (engine_define->settings)
postgresql_replica_settings->loadFromQuery(*engine_define);
return std::make_shared<DatabaseMaterializedPostgreSQL>(
args.context, args.metadata_path, args.uuid, args.create_query.attach,
args.database_name, configuration.database, connection_info,
std::move(postgresql_replica_settings));
};
factory.registerDatabase("MaterializedPostgreSQL", create_fn);
}
}
#endif

View File

@ -6,14 +6,18 @@
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Storages/StoragePostgreSQL.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/escapeForFileName.h>
#include <Common/parseRemoteDescription.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
#include <Common/quoteString.h>
#include <Common/filesystemHelpers.h>
@ -478,6 +482,83 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c
return std::make_shared<ASTIdentifier>(data_type->getName());
}
void registerDatabasePostgreSQL(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
ASTs & engine_args = engine->arguments->children;
const String & engine_name = engine_define->engine->name;
if (!engine->arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine `{}` must have arguments", engine_name);
auto use_table_cache = false;
StoragePostgreSQL::Configuration configuration;
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, args.context))
{
configuration = StoragePostgreSQL::processNamedCollectionResult(*named_collection, args.context, false);
use_table_cache = named_collection->getOrDefault<UInt64>("use_table_cache", 0);
}
else
{
if (engine_args.size() < 4 || engine_args.size() > 6)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"PostgreSQL Database require `host:port`, `database_name`, `username`, `password`"
"[, `schema` = "", `use_table_cache` = 0");
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.context);
const auto & host_port = safeGetLiteralValue<String>(engine_args[0], engine_name);
size_t max_addresses = args.context->getSettingsRef().glob_expansion_max_elements;
configuration.addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 5432);
configuration.database = safeGetLiteralValue<String>(engine_args[1], engine_name);
configuration.username = safeGetLiteralValue<String>(engine_args[2], engine_name);
configuration.password = safeGetLiteralValue<String>(engine_args[3], engine_name);
bool is_deprecated_syntax = false;
if (engine_args.size() >= 5)
{
auto arg_value = engine_args[4]->as<ASTLiteral>()->value;
if (arg_value.getType() == Field::Types::Which::String)
{
configuration.schema = safeGetLiteralValue<String>(engine_args[4], engine_name);
}
else
{
use_table_cache = safeGetLiteralValue<UInt8>(engine_args[4], engine_name);
LOG_WARNING(&Poco::Logger::get("DatabaseFactory"), "A deprecated syntax of PostgreSQL database engine is used");
is_deprecated_syntax = true;
}
}
if (!is_deprecated_syntax && engine_args.size() >= 6)
use_table_cache = safeGetLiteralValue<UInt8>(engine_args[5], engine_name);
}
const auto & settings = args.context->getSettingsRef();
auto pool = std::make_shared<postgres::PoolWithFailover>(
configuration,
settings.postgresql_connection_pool_size,
settings.postgresql_connection_pool_wait_timeout,
POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES,
settings.postgresql_connection_pool_auto_close_connection);
return std::make_shared<DatabasePostgreSQL>(
args.context,
args.metadata_path,
engine_define,
args.database_name,
configuration,
pool,
use_table_cache);
};
factory.registerDatabase("PostgreSQL", create_fn);
}
}
#endif

View File

@ -5,11 +5,11 @@
#include <Common/logger_useful.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/SQLite/fetchSQLiteTableStructure.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h>
#include <Interpreters/Context.h>
#include <Storages/StorageSQLite.h>
#include <Databases/SQLite/SQLiteUtils.h>
@ -21,6 +21,7 @@ namespace ErrorCodes
{
extern const int SQLITE_ENGINE_ERROR;
extern const int UNKNOWN_TABLE;
extern const int BAD_ARGUMENTS;
}
DatabaseSQLite::DatabaseSQLite(
@ -201,6 +202,24 @@ ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, Contex
return create_table_query;
}
void registerDatabaseSQLite(DatabaseFactory & factory)
{
auto create_fn = [](const DatabaseFactory::Arguments & args)
{
auto * engine_define = args.create_query.storage;
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "SQLite database requires 1 argument: database path");
const auto & arguments = engine->arguments->children;
String database_path = safeGetLiteralValue<String>(arguments[0], "SQLite");
return std::make_shared<DatabaseSQLite>(args.context, engine_define, args.create_query.attach, database_path);
};
factory.registerDatabase("SQLite", create_fn);
}
}
#endif

View File

@ -60,7 +60,7 @@ public:
/// Removes all dependencies of "table_id", returns those dependencies.
std::vector<StorageID> removeDependencies(const StorageID & table_id, bool remove_isolated_tables = false);
/// Removes a table from the graph and removes all references to in from the graph (both from its dependencies and dependents).
/// Removes a table from the graph and removes all references to it from the graph (both from its dependencies and dependents).
bool removeTable(const StorageID & table_id);
/// Removes tables from the graph by a specified filter.

View File

@ -0,0 +1,72 @@
#include <Databases/DatabaseFactory.h>
#include <Databases/registerDatabases.h>
namespace DB
{
void registerDatabaseAtomic(DatabaseFactory & factory);
void registerDatabaseOrdinary(DatabaseFactory & factory);
void registerDatabaseDictionary(DatabaseFactory & factory);
void registerDatabaseMemory(DatabaseFactory & factory);
void registerDatabaseLazy(DatabaseFactory & factory);
void registerDatabaseFilesystem(DatabaseFactory & factory);
void registerDatabaseReplicated(DatabaseFactory & factory);
#if USE_MYSQL
void registerDatabaseMySQL(DatabaseFactory & factory);
void registerDatabaseMaterializedMySQL(DatabaseFactory & factory);
#endif
#if USE_LIBPQXX
void registerDatabasePostgreSQL(DatabaseFactory & factory);
void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory);
#endif
#if USE_SQLITE
void registerDatabaseSQLite(DatabaseFactory & factory);
#endif
#if USE_AWS_S3
void registerDatabaseS3(DatabaseFactory & factory);
#endif
#if USE_HDFS
void registerDatabaseHDFS(DatabaseFactory & factory);
#endif
void registerDatabases()
{
auto & factory = DatabaseFactory::instance();
registerDatabaseAtomic(factory);
registerDatabaseOrdinary(factory);
registerDatabaseDictionary(factory);
registerDatabaseMemory(factory);
registerDatabaseLazy(factory);
registerDatabaseFilesystem(factory);
registerDatabaseReplicated(factory);
#if USE_MYSQL
registerDatabaseMySQL(factory);
registerDatabaseMaterializedMySQL(factory);
#endif
#if USE_LIBPQXX
registerDatabasePostgreSQL(factory);
registerDatabaseMaterializedPostgreSQL(factory);
#endif
#if USE_SQLITE
registerDatabaseSQLite(factory);
#endif
#if USE_AWS_S3
registerDatabaseS3(factory);
#endif
#if USE_HDFS
registerDatabaseHDFS(factory);
#endif
}
}

View File

@ -0,0 +1,6 @@
#pragma once
namespace DB
{
void registerDatabases();
}

View File

@ -4,6 +4,9 @@
#include "GeodataProviders/HierarchiesProvider.h"
#include "GeodataProviders/NamesProvider.h"
namespace DB
{
std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config)
{
static constexpr auto config_key = "path_to_regions_hierarchy_file";
@ -27,3 +30,5 @@ std::unique_ptr<RegionsNames> GeoDictionariesLoader::reloadRegionsNames(const Po
auto data_provider = std::make_unique<RegionsNamesDataProvider>(directory);
return std::make_unique<RegionsNames>(std::move(data_provider));
}
}

View File

@ -6,6 +6,9 @@
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
// Default implementation of geo dictionaries loader used by native server application
class GeoDictionariesLoader
{
@ -13,3 +16,5 @@ public:
static std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config);
static std::unique_ptr<RegionsNames> reloadRegionsNames(const Poco::Util::AbstractConfiguration & config);
};
}

View File

@ -3,6 +3,9 @@
#include <string>
#include "Types.h"
namespace DB
{
struct RegionEntry
{
RegionID id;
@ -17,3 +20,5 @@ struct RegionNameEntry
RegionID id;
std::string name;
};
}

View File

@ -9,6 +9,9 @@
namespace fs = std::filesystem;
namespace DB
{
bool RegionsHierarchyDataSource::isModified() const
{
return updates_tracker.isModified();
@ -17,7 +20,7 @@ bool RegionsHierarchyDataSource::isModified() const
IRegionsHierarchyReaderPtr RegionsHierarchyDataSource::createReader()
{
updates_tracker.fixCurrentVersion();
auto file_reader = std::make_shared<DB::ReadBufferFromFile>(path);
auto file_reader = std::make_shared<ReadBufferFromFile>(path);
return std::make_unique<RegionsHierarchyFormatReader>(std::move(file_reader));
}
@ -73,3 +76,5 @@ IRegionsHierarchyDataSourcePtr RegionsHierarchiesDataProvider::getHierarchySourc
throw Poco::Exception("Regions hierarchy '" + name + "' not found");
}
}

View File

@ -5,6 +5,8 @@
#include <unordered_map>
#include <Common/FileUpdatesTracker.h>
namespace DB
{
// Represents local file with regions hierarchy dump
class RegionsHierarchyDataSource : public IRegionsHierarchyDataSource
@ -50,3 +52,5 @@ public:
private:
void discoverFilesWithCustomHierarchies();
};
}

View File

@ -3,6 +3,8 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry)
{
@ -15,11 +17,11 @@ bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry)
Int32 read_parent_id = 0;
Int8 read_type = 0;
DB::readIntText(read_region_id, *input);
DB::assertChar('\t', *input);
DB::readIntText(read_parent_id, *input);
DB::assertChar('\t', *input);
DB::readIntText(read_type, *input);
readIntText(read_region_id, *input);
assertChar('\t', *input);
readIntText(read_parent_id, *input);
assertChar('\t', *input);
readIntText(read_type, *input);
/** Then there can be a newline (old version)
* or tab, the region's population, line feed (new version).
@ -29,11 +31,11 @@ bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry)
{
++input->position();
UInt64 population_big = 0;
DB::readIntText(population_big, *input);
readIntText(population_big, *input);
population = population_big > std::numeric_limits<RegionPopulation>::max() ? std::numeric_limits<RegionPopulation>::max()
: static_cast<RegionPopulation>(population_big);
}
DB::assertChar('\n', *input);
assertChar('\n', *input);
if (read_region_id <= 0 || read_type < 0)
continue;
@ -55,3 +57,5 @@ bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry)
return false;
}
}

View File

@ -3,15 +3,19 @@
#include <IO/ReadBuffer.h>
#include "IHierarchiesProvider.h"
namespace DB
{
// Reads regions hierarchy in geoexport format
class RegionsHierarchyFormatReader : public IRegionsHierarchyReader
{
private:
DB::ReadBufferPtr input;
ReadBufferPtr input;
public:
explicit RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {}
explicit RegionsHierarchyFormatReader(ReadBufferPtr input_) : input(std::move(input_)) {}
bool readNext(RegionEntry & entry) override;
};
}

View File

@ -5,6 +5,8 @@
#include <vector>
#include "Entries.h"
namespace DB
{
// Iterates over all regions in data source
class IRegionsHierarchyReader
@ -46,3 +48,5 @@ public:
};
using IRegionsHierarchiesDataProviderPtr = std::shared_ptr<IRegionsHierarchiesDataProvider>;
}

View File

@ -3,6 +3,8 @@
#include <memory>
#include "Entries.h"
namespace DB
{
// Iterates over all name entries in data source
class ILanguageRegionsNamesReader
@ -49,3 +51,5 @@ public:
};
using IRegionsNamesDataProviderPtr = std::unique_ptr<IRegionsNamesDataProvider>;
}

View File

@ -2,6 +2,8 @@
#include <IO/ReadHelpers.h>
namespace DB
{
bool LanguageRegionsNamesFormatReader::readNext(RegionNameEntry & entry)
{
@ -10,10 +12,10 @@ bool LanguageRegionsNamesFormatReader::readNext(RegionNameEntry & entry)
Int32 read_region_id;
std::string region_name;
DB::readIntText(read_region_id, *input);
DB::assertChar('\t', *input);
DB::readString(region_name, *input);
DB::assertChar('\n', *input);
readIntText(read_region_id, *input);
assertChar('\t', *input);
readString(region_name, *input);
assertChar('\n', *input);
if (read_region_id <= 0)
continue;
@ -25,3 +27,5 @@ bool LanguageRegionsNamesFormatReader::readNext(RegionNameEntry & entry)
return false;
}
}

Some files were not shown because too many files have changed in this diff Show More