Merge pull request #1 from ClickHouse/master

pull master
This commit is contained in:
OnePiece 2021-08-25 15:57:32 +08:00 committed by GitHub
commit a7c58b0d8a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
230 changed files with 6912 additions and 518 deletions

View File

@ -9,7 +9,7 @@ assignees: ''
> You have to provide the following information whenever possible.
**Describe the bug**
**Describe what's wrong**
> A clear and concise description of what works not as it is supposed to.

View File

@ -45,6 +45,7 @@ include (cmake/arch.cmake)
include (cmake/target.cmake)
include (cmake/tools.cmake)
include (cmake/analysis.cmake)
include (cmake/git_status.cmake)
# Ignore export() since we don't use it,
# but it gets broken with a global targets via link_libraries()

View File

@ -1,9 +1,11 @@
# Security Policy
## Supported Versions
## Security Announcements
Security fixes will be announced by posting them in the [security changelog](https://clickhouse.tech/docs/en/whats-new/security-changelog/)
The following versions of ClickHouse server are
currently being supported with security updates:
## Scope and Supported Versions
The following versions of ClickHouse server are currently being supported with security updates:
| Version | Supported |
| ------- | ------------------ |
@ -11,18 +13,49 @@ currently being supported with security updates:
| 18.x | :x: |
| 19.x | :x: |
| 20.1 | :x: |
| 20.3 | :white_check_mark: |
| 20.3 | :x: |
| 20.4 | :x: |
| 20.5 | :x: |
| 20.6 | :x: |
| 20.7 | :x: |
| 20.8 | :white_check_mark: |
| 20.8 | :x: |
| 20.9 | :x: |
| 20.10 | :x: |
| 20.11 | :white_check_mark: |
| 20.12 | :white_check_mark: |
| 21.1 | :white_check_mark: |
| 20.11 | :x: |
| 20.12 | :x: |
| 21.1 | :x: |
| 21.2 | :x: |
| 21.3 | ✅ |
| 21.4 | :x: |
| 21.5 | :x: |
| 21.6 | ✅ |
| 21.7 | ✅ |
| 21.8 | ✅ |
## Reporting a Vulnerability
We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
To report a potential vulnerability in ClickHouse please send the details about it to [clickhouse-feedback@yandex-team.com](mailto:clickhouse-feedback@yandex-team.com).
### When Should I Report a Vulnerability?
- You think you discovered a potential security vulnerability in ClickHouse
- You are unsure how a vulnerability affects ClickHouse
### When Should I NOT Report a Vulnerability?
- You need help tuning ClickHouse components for security
- You need help applying security related updates
- Your issue is not security related
## Security Vulnerability Response
Each report is acknowledged and analyzed by ClickHouse maintainers within 5 working days.
As the security issue moves from triage, to identified fix, to release planning we will keep the reporter updated.
## Public Disclosure Timing
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect report date to disclosure date to be on the order of 7 days.

28
base/common/insertAtEnd.h Normal file
View File

@ -0,0 +1,28 @@
#pragma once
#include <vector>
/// Appends a specified vector with elements of another vector.
template <typename T>
void insertAtEnd(std::vector<T> & dest, const std::vector<T> & src)
{
if (src.empty())
return;
dest.reserve(dest.size() + src.size());
dest.insert(dest.end(), src.begin(), src.end());
}
template <typename T>
void insertAtEnd(std::vector<T> & dest, std::vector<T> && src)
{
if (src.empty())
return;
if (dest.empty())
{
dest.swap(src);
return;
}
dest.reserve(dest.size() + src.size());
dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end()));
src.clear();
}

10
base/common/unit.h Normal file
View File

@ -0,0 +1,10 @@
#pragma once
#include <cstddef>
constexpr size_t KiB = 1024;
constexpr size_t MiB = 1024 * KiB;
constexpr size_t GiB = 1024 * MiB;
constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; }
constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; }
constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; }

View File

@ -9,6 +9,7 @@
#include <cmath>
#include <cfloat>
#include <cassert>
#include <tuple>
#include <limits>
@ -39,6 +40,18 @@ static constexpr bool IntegralConcept() noexcept
return std::is_integral_v<T> || IsWideInteger<T>::value;
}
template <typename T>
class IsTupleLike
{
template <typename U>
static auto check(U * p) -> decltype(std::tuple_size<U>::value, int());
template <typename>
static void check(...);
public:
static constexpr const bool value = !std::is_void<decltype(check<T>(nullptr))>::value;
};
}
namespace std
@ -227,6 +240,19 @@ struct integer<Bits, Signed>::_impl
self.items[i] = 0;
}
template <typename TupleLike, size_t i = 0>
constexpr static void wide_integer_from_tuple_like(integer<Bits, Signed> & self, const TupleLike & tuple) noexcept
{
if constexpr (i < item_count)
{
if constexpr (i < std::tuple_size_v<TupleLike>)
self.items[i] = std::get<i>(tuple);
else
self.items[i] = 0;
wide_integer_from_tuple_like<TupleLike, i + 1>(self, tuple);
}
}
/**
* N.B. t is constructed from double, so max(t) = max(double) ~ 2^310
* the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them.
@ -966,6 +992,8 @@ constexpr integer<Bits, Signed>::integer(T rhs) noexcept
{
if constexpr (IsWideInteger<T>::value)
_impl::wide_integer_from_wide_integer(*this, rhs);
else if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, rhs);
else
_impl::wide_integer_from_builtin(*this, rhs);
}
@ -979,6 +1007,8 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
{
if constexpr (IsWideInteger<T>::value)
_impl::wide_integer_from_wide_integer(*this, *il.begin());
else if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, *il.begin());
else
_impl::wide_integer_from_builtin(*this, *il.begin());
}
@ -1007,7 +1037,10 @@ template <size_t Bits, typename Signed>
template <typename T>
constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator=(T rhs) noexcept
{
_impl::wide_integer_from_builtin(*this, rhs);
if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, rhs);
else
_impl::wide_integer_from_builtin(*this, rhs);
return *this;
}

17
cmake/git_status.cmake Normal file
View File

@ -0,0 +1,17 @@
# Print the status of the git repository (if git is available).
# This is useful for troubleshooting build failure reports
find_package(Git)
if (Git_FOUND)
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_ID
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}")
execute_process(
COMMAND ${GIT_EXECUTABLE} status
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
else()
message(STATUS "The git program could not be found.")
endif()

2
contrib/nanodbc vendored

@ -1 +1 @@
Subproject commit 9fc459675515d491401727ec67fca38db721f28c
Subproject commit df52a1232dfa182f9af60974d001b91823afe9bc

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit c81be6c68b146f15f2096b7ef80e3f21fe27004c
Subproject commit f97765df14f4a6236d69b8f14b53ef2051ebd95a

View File

@ -2,6 +2,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -3,6 +3,8 @@ FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.10.1.*
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install \
apt-transport-https \

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -173,9 +173,6 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
cmake_flags.append('-DUSE_GTEST=1')
cmake_flags.append('-DENABLE_TESTS=1')
cmake_flags.append('-DENABLE_EXAMPLES=1')
cmake_flags.append('-DENABLE_FUZZING=1')
# For fuzzing needs
cmake_flags.append('-DUSE_YAML_CPP=1')
# Don't stop on first error to find more clang-tidy errors in one run.
result.append('NINJA_FLAGS=-k0')

View File

@ -5,6 +5,8 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& wget -nv -O /tmp/arrow-keyring.deb "https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-${CODENAME}.deb" \
&& dpkg -i /tmp/arrow-keyring.deb
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
# Libraries from OS are only needed to test the "unbundled" build (that is not used in production).
RUN apt-get update \
&& apt-get install \

View File

@ -26,6 +26,8 @@ ARG DEBIAN_FRONTEND=noninteractive
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -2,6 +2,8 @@
# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser
FROM yandex/clickhouse-binary-builder
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev
# repo versions doesn't work correctly with C++17

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -393,6 +393,9 @@ function run_tests
01853_s2_cells_intersect
01854_s2_cap_contains
01854_s2_cap_union
# needs s3
01944_insert_partition_by
)
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \

View File

@ -5,6 +5,8 @@ ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
ca-certificates \

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-integration-tests-runner .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
ca-certificates \
@ -77,8 +79,9 @@ RUN python3 -m pip install \
pytest-timeout \
pytest-xdist \
pytest-repeat \
pytz \
redis \
tzlocal \
tzlocal==2.1 \
urllib3 \
requests-kerberos \
pyhdfs

View File

@ -5,6 +5,8 @@ ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
bash \

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-sqlancer-test .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
RUN mkdir /sqlancer && \

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-style-test .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
shellcheck \
libxml2-utils \

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-testflows-runner .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
ca-certificates \
@ -35,7 +37,7 @@ RUN apt-get update \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal python-dateutil numpy
RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 20.10.6

View File

@ -23,3 +23,5 @@ You can also use the following database engines:
- [PostgreSQL](../../engines/database-engines/postgresql.md)
- [Replicated](../../engines/database-engines/replicated.md)
- [SQLite](../../engines/database-engines/sqlite.md)

View File

@ -1,6 +1,6 @@
---
toc_priority: 29
toc_title: "[experimental] MaterializedMySQL"
toc_title: MaterializedMySQL
---
# [experimental] MaterializedMySQL {#materialized-mysql}

View File

@ -0,0 +1,80 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and SQLite.
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**Engine Parameters**
- `db_path` — Path to a file with SQLite database.
## Data Types Support {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## Specifics and Recommendations {#specifics-and-recommendations}
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
## Usage Example {#usage-example}
Database in ClickHouse, connected to the SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
Shows the tables:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
Inserting data into SQLite table from ClickHouse table:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -19,3 +19,4 @@ List of supported integrations:
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [SQLite](../../../engines/table-engines/integrations/sqlite.md)

View File

@ -0,0 +1,59 @@
---
toc_priority: 7
toc_title: SQLite
---
# SQLite {#sqlite}
The engine allows to import and export data to SQLite and supports queries to SQLite tables directly from ClickHouse.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2], ...
) ENGINE = SQLite('db_path', 'table')
```
**Engine Parameters**
- `db_path` — Path to SQLite file with a database.
- `table` — Name of a table in the SQLite database.
## Usage Example {#usage-example}
Shows a query creating the SQLite table:
```sql
SHOW CREATE TABLE sqlite_db.table2;
```
``` text
CREATE TABLE SQLite.table2
(
`col1` Nullable(Int32),
`col2` Nullable(String)
)
ENGINE = SQLite('sqlite.db','table2');
```
Returns the data from the table:
``` sql
SELECT * FROM sqlite_db.table2 ORDER BY col1;
```
```text
┌─col1─┬─col2──┐
│ 1 │ text1 │
│ 2 │ text2 │
│ 3 │ text3 │
└──────┴───────┘
```
**See Also**
- [SQLite](../../../engines/database-engines/sqlite.md) engine
- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function

View File

@ -99,7 +99,9 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”.
- `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
<a name="mergetree_setting-merge_with_ttl_timeout"></a>
- `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with TTL. Default value: 86400 (1 day).
- `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with delete TTL. Default value: `14400` seconds (4 hours).
- `merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours).
- `try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours).
- `write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Dont turn it off.
- `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
- `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
@ -333,7 +335,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025.
Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`.
Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`.
The following functions can use it: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md).
@ -416,18 +418,20 @@ Reading from a table is automatically parallelized.
Determines the lifetime of values.
The `TTL` clause can be set for the whole table and for each individual column. Table-level TTL can also specify logic of automatic move of data between disks and volumes.
The `TTL` clause can be set for the whole table and for each individual column. Table-level `TTL` can also specify the logic of automatic moving data between disks and volumes, or recompressing parts where all the data has been expired.
Expressions must evaluate to [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md) data type.
Example:
**Syntax**
Setting time-to-live for a column:
``` sql
TTL time_column
TTL time_column + interval
```
To define `interval`, use [time interval](../../../sql-reference/operators/index.md#operators-datetime) operators.
To define `interval`, use [time interval](../../../sql-reference/operators/index.md#operators-datetime) operators, for example:
``` sql
TTL date_time + INTERVAL 1 MONTH
@ -440,9 +444,9 @@ When the values in the column expire, ClickHouse replaces them with the default
The `TTL` clause cant be used for key columns.
Examples:
**Examples**
Creating a table with TTL
Creating a table with `TTL`:
``` sql
CREATE TABLE example_table
@ -475,11 +479,11 @@ ALTER TABLE example_table
### Table TTL {#mergetree-table-ttl}
Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria.
Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving or recompressing, all rows of a part must satisfy the `TTL` expression criteria.
``` sql
TTL expr
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
[DELETE|RECOMPRESS codec_name1|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|RECOMPRESS codec_name2|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
[WHERE conditions]
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]
```
@ -487,11 +491,12 @@ TTL expr
Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time):
- `DELETE` - delete expired rows (default action);
- `RECOMPRESS codec_name` - recompress data part with the `codec_name`;
- `TO DISK 'aaa'` - move part to the disk `aaa`;
- `TO VOLUME 'bbb'` - move part to the disk `bbb`;
- `GROUP BY` - aggregate expired rows.
With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves).
With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves or recompression).
`GROUP BY` expression must be a prefix of the table primary key.
@ -499,7 +504,7 @@ If a column is not part of the `GROUP BY` expression and is not set explicitly i
**Examples**
Creating a table with TTL:
Creating a table with `TTL`:
``` sql
CREATE TABLE example_table
@ -515,7 +520,7 @@ TTL d + INTERVAL 1 MONTH [DELETE],
d + INTERVAL 2 WEEK TO DISK 'bbb';
```
Altering TTL of the table:
Altering `TTL` of the table:
``` sql
ALTER TABLE example_table
@ -536,6 +541,21 @@ ORDER BY d
TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
```
Creating a table, where expired rows are recompressed:
```sql
CREATE TABLE table_for_recompression
(
d DateTime,
key UInt64,
value String
) ENGINE MergeTree()
ORDER BY tuple()
PARTITION BY key
TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10))
SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
```
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
``` sql
@ -552,14 +572,19 @@ ORDER BY (k1, k2)
TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
```
**Removing Data**
### Removing Expired Data {#mergetree-removing-expired-data}
Data with an expired TTL is removed when ClickHouse merges data parts.
Data with an expired `TTL` is removed when ClickHouse merges data parts.
When ClickHouse see that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set `merge_with_ttl_timeout`. If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.
When ClickHouse detects that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set `merge_with_ttl_timeout`. If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.
If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query before `SELECT`.
**See Also**
- [ttl_only_drop_parts](../../../operations/settings/settings.md#ttl_only_drop_parts) setting
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
### Introduction {#introduction}

View File

@ -24,7 +24,7 @@ The `Format` parameter specifies one of the available file formats. To perform
`INSERT` queries for output. The available formats are listed in the
[Formats](../../../interfaces/formats.md#formats) section.
ClickHouse does not allow specifying filesystem path for`File`. It will use folder defined by [path](../../../operations/server-configuration-parameters/settings.md) setting in server configuration.
ClickHouse does not allow specifying filesystem path for `File`. It will use folder defined by [path](../../../operations/server-configuration-parameters/settings.md) setting in server configuration.
When creating table using `File(Format)` it creates empty subdirectory in that folder. When data is written to that table, its put into `data.Format` file in that subdirectory.

View File

@ -2041,10 +2041,25 @@ Default value: 0.
## input_format_parallel_parsing {#input-format-parallel-parsing}
- Type: bool
- Default value: True
Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) and [JSONEachRow](../../interfaces/formats.md#jsoneachrow) formats.
Enable order-preserving parallel parsing of data formats. Supported only for TSV, TKSV, CSV, and JSONEachRow formats.
Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: `0`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) and [JSONEachRow](../../interfaces/formats.md#jsoneachrow) formats.
Possible values:
- 1 — Enabled.
- 0 — Disabled.
Default value: `0`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

View File

@ -9,9 +9,9 @@ A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). A
The date value is stored without the time zone.
## Examples {#examples}
**Example**
**1.** Creating a table with a `DateTime`-type column and inserting data into it:
Creating a table with a `Date`-type column and inserting data into it:
``` sql
CREATE TABLE dt
@ -23,10 +23,7 @@ ENGINE = TinyLog;
```
``` sql
INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2);
```
``` sql
INSERT INTO dt VALUES (1546300800, 1), ('2019-01-01', 2);
SELECT * FROM dt;
```
@ -37,11 +34,8 @@ SELECT * FROM dt;
└────────────┴──────────┘
```
## See Also {#see-also}
**See Also**
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
- [`DateTime` data type](../../sql-reference/data-types/datetime.md)
[Original article](https://clickhouse.tech/docs/en/data_types/date/) <!--hide-->

View File

@ -0,0 +1,40 @@
---
toc_priority: 48
toc_title: Date32
---
# Date32 {#data_type-datetime32}
A date. Supports the date range same with [Datetime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1925-01-01. Allows storing values till 2283-11-11.
**Examples**
Creating a table with a `Date32`-type column and inserting data into it:
``` sql
CREATE TABLE new
(
`timestamp` Date32,
`event_id` UInt8
)
ENGINE = TinyLog;
```
``` sql
INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2);
SELECT * FROM new;
```
``` text
┌──timestamp─┬─event_id─┐
│ 2100-01-01 │ 1 │
│ 2100-01-01 │ 2 │
└────────────┴──────────┘
```
**See Also**
- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32)
- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero)
- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null)

View File

@ -17,7 +17,7 @@ DateTime64(precision, [timezone])
Internally, stores data as a number of ticks since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (2020-01-01 05:00:01.000). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](../../sql-reference/data-types/datetime.md).
Supported range from January 1, 1925 till December 31, 2283.
Supported range from January 1, 1925 till November 11, 2283.
## Examples {#examples}

View File

@ -152,6 +152,104 @@ Alias: `DATE`.
## toDateTimeOrNull {#todatetimeornull}
## toDate32 {#todate32}
Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the border values supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
**Syntax**
``` sql
toDate32(expr)
```
**Arguments**
- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) or [Date](../../sql-reference/data-types/date.md).
**Returned value**
- A calendar date.
Type: [Date32](../../sql-reference/data-types/date32.md).
**Example**
1. The value is within the range:
``` sql
SELECT toDate32('1955-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1955-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
2. The value is outside the range:
``` sql
SELECT toDate32('1924-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1925-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
3. With `Date`-type argument:
``` sql
SELECT toDate32(toDate('1924-01-01')) AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32(toDate('1924-01-01')))─┐
│ 1970-01-01 │ Date32 │
└────────────┴────────────────────────────────────────────┘
```
## toDate32OrZero {#todate32-or-zero}
The same as [toDate32](#todate32) but returns the min value of [Date32](../../sql-reference/data-types/date32.md) if invalid argument is received.
**Example**
Query:
``` sql
SELECT toDate32OrZero('1924-01-01'), toDate32OrZero('');
```
Result:
``` text
┌─toDate32OrZero('1924-01-01')─┬─toDate32OrZero('')─┐
│ 1925-01-01 │ 1925-01-01 │
└──────────────────────────────┴────────────────────┘
```
## toDate32OrNull {#todate32-or-null}
The same as [toDate32](#todate32) but returns `NULL` if invalid argument is received.
**Example**
Query:
``` sql
SELECT toDate32OrNull('1955-01-01'), toDate32OrNull('');
```
Result:
``` text
┌─toDate32OrNull('1955-01-01')─┬─toDate32OrNull('')─┐
│ 1955-01-01 │ ᴺᵁᴸᴸ │
└──────────────────────────────┴────────────────────┘
```
## toDecimal(32\|64\|128\|256) {#todecimal3264128256}
Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places.

View File

@ -6,7 +6,7 @@ toc_title: JOIN
Join produces a new table by combining columns from one or multiple tables by using values common to each. It is a common operation in databases with SQL support, which corresponds to [relational algebra](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators) join. The special case of one table join is often referred to as “self-join”.
Syntax:
**Syntax**
``` sql
SELECT <expr_list>
@ -38,7 +38,7 @@ Additional join types available in ClickHouse:
## Settings {#join-settings}
The default join type can be overriden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting.
The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting.
The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting.
@ -52,6 +52,61 @@ The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_
- [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge)
- [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys)
## ON Section Conditions {on-section-conditions}
An `ON` section can contain several conditions combined using the `AND` operator. Conditions specifying join keys must refer both left and right tables and must use the equality operator. Other conditions may use other logical operators but they must refer either the left or the right table of a query.
Rows are joined if the whole complex condition is met. If the conditions are not met, still rows may be included in the result depending on the `JOIN` type. Note that if the same conditions are placed in a `WHERE` section and they are not met, then rows are always filtered out from the result.
!!! note "Note"
The `OR` operator inside an `ON` section is not supported yet.
!!! note "Note"
If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far.
**Example**
Consider `table_1` and `table_2`:
```
┌─Id─┬─name─┐ ┌─Id─┬─text───────────┬─scores─┐
│ 1 │ A │ │ 1 │ Text A │ 10 │
│ 2 │ B │ │ 1 │ Another text A │ 12 │
│ 3 │ C │ │ 2 │ Text B │ 15 │
└────┴──────┘ └────┴────────────────┴────────┘
```
Query with one join key condition and an additional condition for `table_2`:
``` sql
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
ON table_1.Id = table_2.Id AND startsWith(table_2.text, 'Text');
```
Note that the result contains the row with the name `C` and the empty text column. It is included into the result because an `OUTER` type of a join is used.
```
┌─name─┬─text───┐
│ A │ Text A │
│ B │ Text B │
│ C │ │
└──────┴────────┘
```
Query with `INNER` type of a join and multiple conditions:
``` sql
SELECT name, text, scores FROM table_1 INNER JOIN table_2
ON table_1.Id = table_2.Id AND table_2.scores > 10 AND startsWith(table_2.text, 'Text');
```
Result:
```
┌─name─┬─text───┬─scores─┐
│ B │ Text B │ 15 │
└──────┴────────┴────────┘
```
## ASOF JOIN Usage {#asof-join-usage}
`ASOF JOIN` is useful when you need to join records that have no exact match.
@ -59,7 +114,7 @@ The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_
Algorithm requires the special column in tables. This column:
- Must contain an ordered sequence.
- Can be one of the following types: [Int*, UInt*](../../../sql-reference/data-types/int-uint.md), [Float\*](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal\*](../../../sql-reference/data-types/decimal.md).
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
- Cant be the only column in the `JOIN` clause.
Syntax `ASOF JOIN ... ON`:
@ -84,7 +139,7 @@ ASOF JOIN table_2
USING (equi_column1, ... equi_columnN, asof_column)
```
`ASOF JOIN` uses `equi_columnX` for joining on equality and `asof_column` for joining on the closest match with the `table_1.asof_column >= table_2.asof_column` condition. The `asof_column` column always the last one in the `USING` clause.
`ASOF JOIN` uses `equi_columnX` for joining on equality and `asof_column` for joining on the closest match with the `table_1.asof_column >= table_2.asof_column` condition. The `asof_column` column is always the last one in the `USING` clause.
For example, consider the following tables:

View File

@ -14,7 +14,7 @@ You can use table functions in:
The method for creating a temporary table that is available only in the current query. The table is deleted when the query finishes.
- [CREATE TABLE AS \<table_function()\>](../../sql-reference/statements/create/table.md) query.
- [CREATE TABLE AS table_function()](../../sql-reference/statements/create/table.md) query.
It's one of the methods of creating a table.
@ -34,5 +34,6 @@ You can use table functions in:
| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/) <!--hide-->

View File

@ -0,0 +1,45 @@
---
toc_priority: 55
toc_title: sqlite
---
## sqlite {#sqlite}
Allows to perform queries on a data stored in an [SQLite](../../engines/database-engines/sqlite.md) database.
**Syntax**
``` sql
sqlite('db_path', 'table_name')
```
**Arguments**
- `db_path` — Path to a file with an SQLite database. [String](../../sql-reference/data-types/string.md).
- `table_name` — Name of a table in the SQLite database. [String](../../sql-reference/data-types/string.md).
**Returned value**
- A table object with the same columns as in the original `SQLite` table.
**Example**
Query:
``` sql
SELECT * FROM sqlite('sqlite.db', 'table1') ORDER BY col2;
```
Result:
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
**See Also**
- [SQLite](../../engines/table-engines/integrations/sqlite.md) table engine

View File

@ -36,7 +36,7 @@ ClickHouse - полноценная колоночная СУБД. Данные
`IDataType` и `IColumn` слабо связаны друг с другом. Различные типы данных могут быть представлены в памяти с помощью одной реализации `IColumn`. Например, и `DataTypeUInt32`, и `DataTypeDateTime` в памяти представлены как `ColumnUInt32` или `ColumnConstUInt32`. В добавок к этому, один тип данных может быть представлен различными реализациями `IColumn`. Например, `DataTypeUInt8` может быть представлен как `ColumnUInt8` и `ColumnConstUInt8`.
`IDataType` хранит только метаданные. Например, `DataTypeUInt8` не хранить ничего (кроме скрытого указателя `vptr`), а `DataTypeFixedString` хранит только `N` (фиксированный размер строки).
`IDataType` хранит только метаданные. Например, `DataTypeUInt8` не хранит ничего (кроме скрытого указателя `vptr`), а `DataTypeFixedString` хранит только `N` (фиксированный размер строки).
В `IDataType` есть вспомогательные методы для данных различного формата. Среди них методы сериализации значений, допускающих использование кавычек, сериализации значения в JSON или XML. Среди них нет прямого соответствия форматам данных. Например, различные форматы `Pretty` и `TabSeparated` могут использовать один вспомогательный метод `serializeTextEscaped` интерфейса `IDataType`.
@ -62,7 +62,7 @@ ClickHouse - полноценная колоночная СУБД. Данные
> Потоки блоков используют «втягивающий» (pull) подход к управлению потоком выполнения: когда вы вытягиваете блок из первого потока, он, следовательно, вытягивает необходимые блоки из вложенных потоков, так и работает весь конвейер выполнения. Ни «pull» ни «push» не имеют явного преимущества, потому что поток управления неявный, и это ограничивает в реализации различных функций, таких как одновременное выполнение нескольких запросов (слияние нескольких конвейеров вместе). Это ограничение можно преодолеть с помощью сопрограмм (coroutines) или просто запуском дополнительных потоков, которые ждут друг друга. У нас может быть больше возможностей, если мы сделаем поток управления явным: если мы локализуем логику для передачи данных из одной расчетной единицы в другую вне этих расчетных единиц. Читайте эту [статью](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) для углубленного изучения.
Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся сохранить размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в конвеере вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у такого подхода также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, использование промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов.
Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся сохранить размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в конвейере вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у такого подхода также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, использование промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов.
## Форматы {#formats}
@ -119,7 +119,7 @@ ClickHouse - полноценная колоночная СУБД. Данные
Существуют обычные функции и агрегатные функции. Агрегатные функции смотрите в следующем разделе.
Обычный функции не изменяют число строк и работают так, как если бы обрабатывали каждую строку независимо. В действительности же, функции вызываются не к отдельным строкам, а блокам данных для реализации векторизованного выполнения запросов.
Обычные функции не изменяют число строк и работают так, как если бы обрабатывали каждую строку независимо. В действительности же, функции вызываются не к отдельным строкам, а блокам данных для реализации векторизованного выполнения запросов.
Некоторые функции, такие как [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), и [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulate), эксплуатируют блочную обработку и нарушают независимость строк.
@ -162,7 +162,7 @@ ClickHouse имеет сильную типизацию, поэтому нет
Сервера в кластере в основном независимы. Вы можете создать `Распределенную` (`Distributed`) таблицу на одном или всех серверах в кластере. Такая таблица сама по себе не хранит данные - она только предоставляет возможность "просмотра" всех локальных таблиц на нескольких узлах кластера. При выполнении `SELECT` распределенная таблица переписывает запрос, выбирает удаленные узлы в соответствии с настройками балансировки нагрузки и отправляет им запрос. Распределенная таблица просит удаленные сервера обработать запрос до той стадии, когда промежуточные результаты с разных серверов могут быть объединены. Затем он получает промежуточные результаты и объединяет их. Распределенная таблица пытается возложить как можно больше работы на удаленные серверы и сократить объем промежуточных данных, передаваемых по сети.
Ситуация усложняется, при использовании подзапросов в случае `IN` или `JOIN`, когда каждый из них использует таблицу `Distributed`. Есть разные стратегии для выполнения таких запросов.
Ситуация усложняется при использовании подзапросов в случае `IN` или `JOIN`, когда каждый из них использует таблицу `Distributed`. Есть разные стратегии для выполнения таких запросов.
Глобального плана выполнения распределенных запросов не существует. Каждый узел имеет собственный локальный план для своей части работы. У нас есть простое однонаправленное выполнение распределенных запросов: мы отправляем запросы на удаленные узлы и затем объединяем результаты. Но это невозможно для сложных запросов `GROUP BY` высокой кардинальности или запросов с большим числом временных данных в `JOIN`: в таких случаях нам необходимо перераспределить («reshuffle») данные между серверами, что требует дополнительной координации. ClickHouse не поддерживает выполнение запросов такого рода, и нам нужно работать над этим.

View File

@ -0,0 +1,79 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
Движок баз данных позволяет подключаться к базе [SQLite](https://www.sqlite.org/index.html) и выполнять запросы `INSERT` и `SELECT` для обмена данными между ClickHouse и SQLite.
## Создание базы данных {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**Параметры движка**
- `db_path` — путь к файлу с базой данных SQLite.
## Поддерживаемые типы данных {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## Особенности и рекомендации {#specifics-and-recommendations}
SQLite хранит всю базу данных (определения, таблицы, индексы и сами данные) в виде единого кроссплатформенного файла на хост-машине. Во время записи SQLite блокирует весь файл базы данных, поэтому операции записи выполняются последовательно. Операции чтения могут быть многозадачными.
SQLite не требует управления службами (например, сценариями запуска) или контроля доступа на основе `GRANT` и паролей. Контроль доступа осуществляется с помощью разрешений файловой системы, предоставляемых самому файлу базы данных.
## Примеры использования {#usage-example}
Отобразим список таблиц базы данных в ClickHouse, подключенной к SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
Отобразим содержимое таблицы:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
Вставим данные в таблицу SQLite из таблицы ClickHouse:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -0,0 +1,59 @@
---
toc_priority: 7
toc_title: SQLite
---
# SQLite {#sqlite}
Движок позволяет импортировать и экспортировать данные из SQLite, а также поддерживает отправку запросов к таблицам SQLite напрямую из ClickHouse.
## Создание таблицы {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2], ...
) ENGINE = SQLite('db_path', 'table')
```
**Параметры движка**
- `db_path` — путь к файлу с базой данных SQLite.
- `table` — имя таблицы в базе данных SQLite.
## Примеры использования {#usage-example}
Отобразим запрос, с помощью которого была создана таблица SQLite:
```sql
SHOW CREATE TABLE sqlite_db.table2;
```
``` text
CREATE TABLE SQLite.table2
(
`col1` Nullable(Int32),
`col2` Nullable(String)
)
ENGINE = SQLite('sqlite.db','table2');
```
Получим данные из таблицы:
``` sql
SELECT * FROM sqlite_db.table2 ORDER BY col1;
```
```text
┌─col1─┬─col2──┐
│ 1 │ text1 │
│ 2 │ text2 │
│ 3 │ text3 │
└──────┴───────┘
```
**См. также**
- [SQLite](../../../engines/database-engines/sqlite.md) движок баз данных
- [sqlite](../../../sql-reference/table-functions/sqlite.md) табличная функция

View File

@ -86,7 +86,9 @@ ORDER BY expr
- `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
- `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
- <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
- `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием для удаления данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа).
- `merge_with_recompression_ttl_timeout` — минимальное время в секундах перед повторным слиянием для повторного сжатия данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа).
- `try_fetch_recompressed_part_timeout` — время ожидания (в секундах) перед началом слияния с повторным сжатием. В течение этого времени ClickHouse пытается извлечь сжатую часть из реплики, которая назначила это слияние. Значение по умолчанию: `7200` секунд (2 часа).
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
- `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
@ -401,20 +403,22 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
## TTL для столбцов и таблиц {#table_engine-mergetree-ttl}
Определяет время жизни значений, а также правила перемещения данных на другой диск или том.
Определяет время жизни значений.
Секция `TTL` может быть установлена как для всей таблицы, так и для каждого отдельного столбца. Правила `TTL` для таблицы позволяют указать целевые диски или тома для фонового перемещения на них частей данных.
Секция `TTL` может быть установлена как для всей таблицы, так и для каждого отдельного столбца. Для таблиц можно установить правила `TTL` для фонового перемещения кусков данных на целевые диски или тома, или правила повторного сжатия кусков данных.
Выражения должны возвращать тип [Date](../../../engines/table-engines/mergetree-family/mergetree.md) или [DateTime](../../../engines/table-engines/mergetree-family/mergetree.md).
Для задания времени жизни столбца, например:
**Синтаксис**
Для задания времени жизни столбца:
``` sql
TTL time_column
TTL time_column + interval
```
Чтобы задать `interval`, используйте операторы [интервала времени](../../../engines/table-engines/mergetree-family/mergetree.md#operators-datetime).
Чтобы задать `interval`, используйте операторы [интервала времени](../../../engines/table-engines/mergetree-family/mergetree.md#operators-datetime), например:
``` sql
TTL date_time + INTERVAL 1 MONTH
@ -423,13 +427,13 @@ TTL date_time + INTERVAL 15 HOUR
### TTL столбца {#mergetree-column-ttl}
Когда срок действия значений в столбце истечет, ClickHouse заменит их значениями по умолчанию для типа данных столбца. Если срок действия всех значений столбцов в части данных истек, ClickHouse удаляет столбец из куска данных в файловой системе.
Когда срок действия значений в столбце истечёт, ClickHouse заменит их значениями по умолчанию для типа данных столбца. Если срок действия всех значений столбцов в части данных истек, ClickHouse удаляет столбец из куска данных в файловой системе.
Секцию `TTL` нельзя использовать для ключевых столбцов.
Примеры:
**Примеры**
Создание таблицы с TTL
Создание таблицы с `TTL`:
``` sql
CREATE TABLE example_table
@ -444,7 +448,7 @@ PARTITION BY toYYYYMM(d)
ORDER BY d;
```
Добавление TTL на колонку существующей таблицы
Добавление `TTL` на колонку существующей таблицы:
``` sql
ALTER TABLE example_table
@ -452,7 +456,7 @@ ALTER TABLE example_table
c String TTL d + INTERVAL 1 DAY;
```
Изменение TTL у колонки
Изменение `TTL` у колонки:
``` sql
ALTER TABLE example_table
@ -462,23 +466,24 @@ ALTER TABLE example_table
### TTL таблицы {#mergetree-table-ttl}
Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, по срабатывании которых данные переместятся на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки.
Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, при срабатывании которых данные будут перемещены на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки. Операции перемещения или повторного сжатия данных выполняются только когда устаревают все данные в куске.
``` sql
TTL expr
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
[DELETE|RECOMPRESS codec_name1|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|RECOMPRESS codec_name2|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
[WHERE conditions]
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]
```
За каждым TTL выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату TTL выражения:
За каждым `TTL` выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату `TTL` выражения:
- `DELETE` - удалить данные (действие по умолчанию);
- `RECOMPRESS codec_name` - повторно сжать данные с помощью кодека `codec_name`;
- `TO DISK 'aaa'` - переместить данные на диск `aaa`;
- `TO VOLUME 'bbb'` - переместить данные на том `bbb`;
- `GROUP BY` - агрегировать данные.
В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения условие `WHERE` не применимо).
В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения и сжатия условие `WHERE` не применимо).
Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы.
@ -486,7 +491,7 @@ TTL expr
**Примеры**
Создание таблицы с TTL:
Создание таблицы с `TTL`:
``` sql
CREATE TABLE example_table
@ -502,7 +507,7 @@ TTL d + INTERVAL 1 MONTH [DELETE],
d + INTERVAL 2 WEEK TO DISK 'bbb';
```
Изменение TTL:
Изменение `TTL`:
``` sql
ALTER TABLE example_table
@ -523,6 +528,21 @@ ORDER BY d
TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
```
Создание таблицы, в которой куски с устаревшими данными повторно сжимаются:
```sql
CREATE TABLE table_for_recompression
(
d DateTime,
key UInt64,
value String
) ENGINE MergeTree()
ORDER BY tuple()
PARTITION BY key
TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10))
SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
```
Создание таблицы, где устаревшие строки агрегируются. В результирующих строках колонка `x` содержит максимальное значение по сгруппированным строкам, `y` — минимальное значение, а `d` — случайное значение из одной из сгуппированных строк.
``` sql
@ -539,14 +559,18 @@ ORDER BY (k1, k2)
TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
```
**Удаление данных**
### Удаление устаревших данных {#mergetree-removing-expired-data}
Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных.
Данные с истекшим `TTL` удаляются, когда ClickHouse мёржит куски данных.
Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера.
Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) перед `SELECT`.
**См. также**
- настройку [ttl_only_drop_parts](../../../operations/settings/settings.md#ttl_only_drop_parts)
## Хранение данных таблицы на нескольких блочных устройствах {#table_engine-mergetree-multiple-volumes}
### Введение {#introduction}

View File

@ -1865,10 +1865,25 @@ ClickHouse генерирует исключение
## input_format_parallel_parsing {#input-format-parallel-parsing}
- Тип: bool
- Значение по умолчанию: True
Включает или отключает режим, при котором входящие данные разбиваются на части, парсинг каждой из которых осуществляется параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
Включает режим, при котором входящие данные парсятся параллельно, но с сохранением исходного порядка следования. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow.
Возможные значения:
- 1 — включен режим параллельного разбора.
- 0 — отключен режим параллельного разбора.
Значение по умолчанию: `0`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
Включает или отключает режим, при котором исходящие данные форматируются параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
Возможные значения:
- 1 — включен режим параллельного форматирования.
- 0 — отключен режим параллельного форматирования.
Значение по умолчанию: `0`.
## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

View File

@ -9,9 +9,9 @@ toc_title: Date
Дата хранится без учёта часового пояса.
## Примеры {#examples}
**Пример**
**1.** Создание таблицы и добавление в неё данных:
Создание таблицы и добавление в неё данных:
``` sql
CREATE TABLE dt
@ -24,9 +24,6 @@ ENGINE = TinyLog;
``` sql
INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2);
```
``` sql
SELECT * FROM dt;
```
@ -37,7 +34,7 @@ SELECT * FROM dt;
└────────────┴──────────┘
```
## Смотрите также {#see-also}
**См. также**
- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md)
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)

View File

@ -0,0 +1,40 @@
---
toc_priority: 48
toc_title: Date32
---
# Date32 {#data_type-datetime32}
Дата. Поддерживается такой же диапазон дат, как для типа [Datetime64](../../sql-reference/data-types/datetime64.md). Значение хранится в четырех байтах и соответствует числу дней с 1925-01-01 по 2283-11-11.
**Пример**
Создание таблицы со столбцом типа `Date32`и добавление в нее данных:
``` sql
CREATE TABLE new
(
`timestamp` Date32,
`event_id` UInt8
)
ENGINE = TinyLog;
```
``` sql
INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2);
SELECT * FROM new;
```
``` text
┌──timestamp─┬─event_id─┐
│ 2100-01-01 │ 1 │
│ 2100-01-01 │ 2 │
└────────────┴──────────┘
```
**См. также**
- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32)
- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero)
- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null)

View File

@ -17,7 +17,7 @@ DateTime64(precision, [timezone])
Данные хранятся в виде количества ‘тиков’, прошедших с момента начала эпохи (1970-01-01 00:00:00 UTC), в Int64. Размер тика определяется параметром precision. Дополнительно, тип `DateTime64` позволяет хранить часовой пояс, единый для всей колонки, который влияет на то, как будут отображаться значения типа `DateTime64` в текстовом виде и как будут парситься значения заданные в виде строк (2020-01-01 05:00:01.000). Часовой пояс не хранится в строках таблицы (выборки), а хранится в метаданных колонки. Подробнее см. [DateTime](datetime.md).
Поддерживаются значения от 1 января 1925 г. и до 31 декабря 2283 г.
Поддерживаются значения от 1 января 1925 г. и до 11 ноября 2283 г.
## Примеры {#examples}

View File

@ -152,6 +152,104 @@ Cиноним: `DATE`.
## toDateTimeOrNull {#todatetimeornull}
## toDate32 {#todate32}
Конвертирует аргумент в значение типа [Date32](../../sql-reference/data-types/date32.md). Если значение выходит за границы диапазона, возвращается пограничное значение `Date32`. Если аргумент имеет тип [Date](../../sql-reference/data-types/date.md), учитываются границы типа `Date`.
**Синтаксис**
``` sql
toDate32(value)
```
**Аргументы**
- `value` — Значение даты. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) или [Date](../../sql-reference/data-types/date.md).
**Возвращаемое значение**
- Календарная дата.
Тип: [Date32](../../sql-reference/data-types/date32.md).
**Пример**
1. Значение находится в границах диапазона:
``` sql
SELECT toDate32('1955-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1955-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
2. Значение выходит за границы диапазона:
``` sql
SELECT toDate32('1924-01-01') AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32('1925-01-01'))─┐
│ 1925-01-01 │ Date32 │
└────────────┴────────────────────────────────────┘
```
3. С аргументом типа `Date`:
``` sql
SELECT toDate32(toDate('1924-01-01')) AS value, toTypeName(value);
```
``` text
┌──────value─┬─toTypeName(toDate32(toDate('1924-01-01')))─┐
│ 1970-01-01 │ Date32 │
└────────────┴────────────────────────────────────────────┘
```
## toDate32OrZero {#todate32-or-zero}
То же самое, что и [toDate32](#todate32), но возвращает минимальное значение типа [Date32](../../sql-reference/data-types/date32.md), если получен недопустимый аргумент.
**Пример**
Запрос:
``` sql
SELECT toDate32OrZero('1924-01-01'), toDate32OrZero('');
```
Результат:
``` text
┌─toDate32OrZero('1924-01-01')─┬─toDate32OrZero('')─┐
│ 1925-01-01 │ 1925-01-01 │
└──────────────────────────────┴────────────────────┘
```
## toDate32OrNull {#todate32-or-null}
То же самое, что и [toDate32](#todate32), но возвращает `NULL`, если получен недопустимый аргумент.
**Пример**
Запрос:
``` sql
SELECT toDate32OrNull('1955-01-01'), toDate32OrNull('');
```
Результат:
``` text
┌─toDate32OrNull('1955-01-01')─┬─toDate32OrNull('')─┐
│ 1955-01-01 │ ᴺᵁᴸᴸ │
└──────────────────────────────┴────────────────────┘
```
## toDecimal(32\|64\|128\|256) {#todecimal3264128}
Преобразует `value` к типу данных [Decimal](../../sql-reference/functions/type-conversion-functions.md) с точностью `S`. `value` может быть числом или строкой. Параметр `S` (scale) задаёт число десятичных знаков.

View File

@ -6,7 +6,7 @@ toc_title: JOIN
`JOIN` создаёт новую таблицу путем объединения столбцов из одной или нескольких таблиц с использованием общих для каждой из них значений. Это обычная операция в базах данных с поддержкой SQL, которая соответствует join из [реляционной алгебры](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators). Частный случай соединения одной таблицы часто называют self-join.
Синтаксис:
**Синтаксис**
``` sql
SELECT <expr_list>
@ -19,7 +19,7 @@ FROM <left_table>
## Поддерживаемые типы соединения {#select-join-types}
Все типы из стандартого [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) поддерживаются:
Все типы из стандартного [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) поддерживаются:
- `INNER JOIN`, возвращаются только совпадающие строки.
- `LEFT OUTER JOIN`, не совпадающие строки из левой таблицы возвращаются в дополнение к совпадающим строкам.
@ -33,7 +33,7 @@ FROM <left_table>
- `LEFT SEMI JOIN` и `RIGHT SEMI JOIN`, белый список по ключам соединения, не производит декартово произведение.
- `LEFT ANTI JOIN` и `RIGHT ANTI JOIN`, черный список по ключам соединения, не производит декартово произведение.
- `LEFT ANY JOIN`, `RIGHT ANY JOIN` и `INNER ANY JOIN`, Частично (для противоположных сторон `LEFT` и `RIGHT`) или полностью (для `INNER` и `FULL`) отключает декартово произведение для стандартых видов `JOIN`.
- `LEFT ANY JOIN`, `RIGHT ANY JOIN` и `INNER ANY JOIN`, Частично (для противоположных сторон `LEFT` и `RIGHT`) или полностью (для `INNER` и `FULL`) отключает декартово произведение для стандартных видов `JOIN`.
- `ASOF JOIN` и `LEFT ASOF JOIN`, Для соединения последовательностей по нечеткому совпадению. Использование `ASOF JOIN` описано ниже.
## Настройки {#join-settings}
@ -52,6 +52,61 @@ FROM <left_table>
- [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge)
- [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys)
## Условия в секции ON {on-section-conditions}
Секция `ON` может содержать несколько условий, связанных оператором `AND`. Условия, задающие ключи соединения, должны содержать столбцы левой и правой таблицы и должны использовать оператор равенства. Прочие условия могут использовать другие логические операторы, но в отдельном условии могут использоваться столбцы либо только левой, либо только правой таблицы.
Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат.
!!! note "Примечание"
Оператор `OR` внутри секции `ON` пока не поддерживается.
!!! note "Примечание"
Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`).
**Пример**
Рассмотрим `table_1` и `table_2`:
```
┌─Id─┬─name─┐ ┌─Id─┬─text───────────┬─scores─┐
│ 1 │ A │ │ 1 │ Text A │ 10 │
│ 2 │ B │ │ 1 │ Another text A │ 12 │
│ 3 │ C │ │ 2 │ Text B │ 15 │
└────┴──────┘ └────┴────────────────┴────────┘
```
Запрос с одним условием, задающим ключ соединения, и дополнительным условием для `table_2`:
``` sql
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
ON table_1.Id = table_2.Id AND startsWith(table_2.text, 'Text');
```
Обратите внимание, что результат содержит строку с именем `C` и пустым текстом. Строка включена в результат, потому что использован тип соединения `OUTER`.
```
┌─name─┬─text───┐
│ A │ Text A │
│ B │ Text B │
│ C │ │
└──────┴────────┘
```
Запрос с типом соединения `INNER` и несколькими условиями:
``` sql
SELECT name, text, scores FROM table_1 INNER JOIN table_2
ON table_1.Id = table_2.Id AND table_2.scores > 10 AND startsWith(table_2.text, 'Text');
```
Результат:
```
┌─name─┬─text───┬─scores─┐
│ B │ Text B │ 15 │
└──────┴────────┴────────┘
```
## Использование ASOF JOIN {#asof-join-usage}
`ASOF JOIN` применим в том случае, когда необходимо объединять записи, которые не имеют точного совпадения.
@ -59,7 +114,7 @@ FROM <left_table>
Для работы алгоритма необходим специальный столбец в таблицах. Этот столбец:
- Должен содержать упорядоченную последовательность.
- Может быть одного из следующих типов: [Int*, UInt*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Date](../../data-types/date.md), [DateTime](../../data-types/datetime.md), [Decimal*](../../data-types/decimal.md).
- Может быть одного из следующих типов: [Int, UInt](../../data-types/int-uint.md), [Float](../../data-types/float.md), [Date](../../data-types/date.md), [DateTime](../../data-types/datetime.md), [Decimal](../../data-types/decimal.md).
- Не может быть единственным столбцом в секции `JOIN`.
Синтаксис `ASOF JOIN ... ON`:

View File

@ -0,0 +1,45 @@
---
toc_priority: 55
toc_title: sqlite
---
## sqlite {#sqlite}
Позволяет выполнять запросы к данным, хранящимся в базе данных [SQLite](../../engines/database-engines/sqlite.md).
**Синтаксис**
``` sql
sqlite('db_path', 'table_name')
```
**Аргументы**
- `db_path` — путь к файлу с базой данных SQLite. [String](../../sql-reference/data-types/string.md).
- `table_name` — имя таблицы в базе данных SQLite. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Объект таблицы с теми же столбцами, что и в исходной таблице `SQLite`.
**Пример**
Запрос:
``` sql
SELECT * FROM sqlite('sqlite.db', 'table1') ORDER BY col2;
```
Результат:
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
**См. также**
- [SQLite](../../engines/table-engines/integrations/sqlite.md) движок таблиц

View File

@ -12,6 +12,7 @@
#include <Interpreters/executeQuery.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/UserDefinedObjectsLoader.h>
#include <Interpreters/Session.h>
#include <Common/Exception.h>
#include <Common/Macros.h>
@ -287,6 +288,12 @@ try
/// Lock path directory before read
status.emplace(path + "status", StatusFile::write_full_info);
fs::create_directories(fs::path(path) / "user_defined/");
LOG_DEBUG(log, "Loading user defined objects from {}", path);
Poco::File(path + "user_defined/").createDirectories();
UserDefinedObjectsLoader::instance().loadObjects(global_context);
LOG_DEBUG(log, "Loaded user defined objects.");
LOG_DEBUG(log, "Loading metadata from {}", path);
fs::create_directories(fs::path(path) / "data/");
fs::create_directories(fs::path(path) / "metadata/");
@ -376,8 +383,8 @@ void LocalServer::processQueries()
throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR);
/// Authenticate and create a context to execute queries.
Session session{global_context, ClientInfo::Interface::TCP};
session.authenticate("default", "", Poco::Net::SocketAddress{});
Session session{global_context, ClientInfo::Interface::LOCAL};
session.authenticate("default", "", {});
/// Use the same context for all queries.
auto context = session.makeQueryContext();

View File

@ -53,6 +53,7 @@
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Interpreters/InterserverCredentials.h>
#include <Interpreters/UserDefinedObjectsLoader.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Access/AccessControlManager.h>
#include <Storages/StorageReplicatedMergeTree.h>
@ -736,6 +737,10 @@ if (ThreadFuzzer::instance().isEffective())
setupTmpPath(log, disk->getPath());
}
/// Storage keeping all the backups.
fs::create_directories(path / "backups");
global_context->setBackupsVolume(config().getString("backups_path", path / "backups"), config().getString("backups_policy", ""));
/** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
* Flags may be cleared automatically after being applied by the server.
* Examples: do repair of local data; clone all replicated tables from replica.
@ -770,6 +775,7 @@ if (ThreadFuzzer::instance().isEffective())
{
fs::create_directories(path / "data/");
fs::create_directories(path / "metadata/");
fs::create_directories(path / "user_defined/");
/// Directory with metadata of tables, which was marked as dropped by Atomic database
fs::create_directories(path / "metadata_dropped/");
@ -1079,6 +1085,9 @@ if (ThreadFuzzer::instance().isEffective())
/// Wait server pool to avoid use-after-free of destroyed context in the handlers
server_pool.joinAll();
// Uses a raw pointer to global context for getting ZooKeeper.
main_config_reloader.reset();
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
* At this moment, no one could own shared part of Context.
*/
@ -1091,6 +1100,18 @@ if (ThreadFuzzer::instance().isEffective())
/// system logs may copy global context.
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading user defined objects from {}", path_str);
try
{
UserDefinedObjectsLoader::instance().loadObjects(global_context);
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while loading user defined objects");
throw;
}
LOG_DEBUG(log, "Loaded user defined objects");
LOG_INFO(log, "Loading metadata from {}", path_str);
try
@ -1510,7 +1531,6 @@ if (ThreadFuzzer::instance().isEffective())
LOG_INFO(log, "Closed connections.");
dns_cache_updater.reset();
main_config_reloader.reset();
if (current_connections)
{

View File

@ -87,6 +87,7 @@ enum class AccessType
M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\
M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables;
implicitly enabled by the grant CREATE_TABLE on any table */ \
M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
\
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\
@ -94,6 +95,7 @@ enum class AccessType
M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views;
implicitly enabled by the grant DROP_TABLE */\
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
\
M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \

View File

@ -45,7 +45,7 @@ TEST(AccessRights, Union)
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}

View File

@ -1,5 +1,5 @@
#include <AggregateFunctions/AggregateFunctionIf.h>
#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
#include <AggregateFunctions/AggregateFunctionIf.h>
#include "AggregateFunctionNull.h"
@ -11,6 +11,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_AGGREGATION;
}
class AggregateFunctionCombinatorIf final : public IAggregateFunctionCombinator
@ -37,6 +38,10 @@ public:
const DataTypes & arguments,
const Array & params) const override
{
if (nested_function->getName().find(getName()) != String::npos)
{
throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, "nested function for {0}-combinator must not have {0}-combinator", getName());
}
return std::make_shared<AggregateFunctionIf>(nested_function, arguments, params);
}
};

View File

@ -56,6 +56,8 @@ template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted
template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;
template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
template <template <typename, bool> class Function>
static constexpr bool supportDecimal()
@ -167,6 +169,9 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile<FuncQuantileBFloat16>);
factory.registerFunction(NameQuantilesBFloat16::name, { createAggregateFunctionQuantile<FuncQuantilesBFloat16>, properties });
factory.registerFunction(NameQuantileBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantileBFloat16Weighted>);
factory.registerFunction(NameQuantilesBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantilesBFloat16Weighted>);
/// 'median' is an alias for 'quantile'
factory.registerAlias("median", NameQuantile::name);
factory.registerAlias("medianDeterministic", NameQuantileDeterministic::name);
@ -179,6 +184,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
factory.registerAlias("medianTDigest", NameQuantileTDigest::name);
factory.registerAlias("medianTDigestWeighted", NameQuantileTDigestWeighted::name);
factory.registerAlias("medianBFloat16", NameQuantileBFloat16::name);
factory.registerAlias("medianBFloat16Weighted", NameQuantileBFloat16Weighted::name);
}
}

View File

@ -237,5 +237,7 @@ struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDi
struct NameQuantileBFloat16 { static constexpr auto name = "quantileBFloat16"; };
struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16"; };
struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };
}

View File

@ -0,0 +1,28 @@
#include <Backups/BackupEntryConcat.h>
#include <IO/ConcatReadBuffer.h>
namespace DB
{
BackupEntryConcat::BackupEntryConcat(
BackupEntryPtr first_source_,
BackupEntryPtr second_source_,
const std::optional<UInt128> & checksum_)
: first_source(std::move(first_source_))
, second_source(std::move(second_source_))
, checksum(checksum_)
{
}
UInt64 BackupEntryConcat::getSize() const
{
if (!size)
size = first_source->getSize() + second_source->getSize();
return *size;
}
std::unique_ptr<ReadBuffer> BackupEntryConcat::getReadBuffer() const
{
return std::make_unique<ConcatReadBuffer>(first_source->getReadBuffer(), second_source->getReadBuffer());
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Backups/IBackupEntry.h>
namespace DB
{
/// Concatenates data of two backup entries.
class BackupEntryConcat : public IBackupEntry
{
public:
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
BackupEntryConcat(
BackupEntryPtr first_source_,
BackupEntryPtr second_source_,
const std::optional<UInt128> & checksum_ = {});
UInt64 getSize() const override;
std::optional<UInt128> getChecksum() const override { return checksum; }
std::unique_ptr<ReadBuffer> getReadBuffer() const override;
private:
BackupEntryPtr first_source;
BackupEntryPtr second_source;
mutable std::optional<UInt64> size;
std::optional<UInt128> checksum;
};
}

View File

@ -0,0 +1,35 @@
#include <Backups/BackupEntryFromAppendOnlyFile.h>
#include <IO/LimitReadBuffer.h>
namespace DB
{
BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
const String & file_path_,
const std::optional<UInt64> & file_size_,
const std::optional<UInt128> & checksum_,
const std::shared_ptr<Poco::TemporaryFile> & temporary_file_)
: BackupEntryFromImmutableFile(file_path_, file_size_, checksum_, temporary_file_)
, limit(BackupEntryFromImmutableFile::getSize())
{
}
BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
const DiskPtr & disk_,
const String & file_path_,
const std::optional<UInt64> & file_size_,
const std::optional<UInt128> & checksum_,
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
: BackupEntryFromImmutableFile(disk_, file_path_, file_size_, checksum_, temporary_file_)
, limit(BackupEntryFromImmutableFile::getSize())
{
}
std::unique_ptr<ReadBuffer> BackupEntryFromAppendOnlyFile::getReadBuffer() const
{
auto buf = BackupEntryFromImmutableFile::getReadBuffer();
return std::make_unique<LimitReadBuffer>(std::move(buf), limit, true);
}
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <Backups/BackupEntryFromImmutableFile.h>
namespace DB
{
/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed
/// the file can be appended with new data, but the bytes which are already in the file won't be changed.
class BackupEntryFromAppendOnlyFile : public BackupEntryFromImmutableFile
{
public:
/// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
BackupEntryFromAppendOnlyFile(
const String & file_path_,
const std::optional<UInt64> & file_size_ = {},
const std::optional<UInt128> & checksum_ = {},
const std::shared_ptr<Poco::TemporaryFile> & temporary_file_ = {});
BackupEntryFromAppendOnlyFile(
const DiskPtr & disk_,
const String & file_path_,
const std::optional<UInt64> & file_size_ = {},
const std::optional<UInt128> & checksum_ = {},
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
UInt64 getSize() const override { return limit; }
std::unique_ptr<ReadBuffer> getReadBuffer() const override;
private:
const UInt64 limit;
};
}

View File

@ -0,0 +1,47 @@
#include <Backups/BackupEntryFromImmutableFile.h>
#include <Disks/IDisk.h>
#include <IO/createReadBufferFromFileBase.h>
#include <Poco/File.h>
namespace DB
{
BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
const String & file_path_,
const std::optional<UInt64> & file_size_,
const std::optional<UInt128> & checksum_,
const std::shared_ptr<Poco::TemporaryFile> & temporary_file_)
: file_path(file_path_), file_size(file_size_), checksum(checksum_), temporary_file(temporary_file_)
{
}
BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
const DiskPtr & disk_,
const String & file_path_,
const std::optional<UInt64> & file_size_,
const std::optional<UInt128> & checksum_,
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
: disk(disk_), file_path(file_path_), file_size(file_size_), checksum(checksum_), temporary_file_on_disk(temporary_file_)
{
}
BackupEntryFromImmutableFile::~BackupEntryFromImmutableFile() = default;
UInt64 BackupEntryFromImmutableFile::getSize() const
{
std::lock_guard lock{get_file_size_mutex};
if (!file_size)
file_size = disk ? disk->getFileSize(file_path) : Poco::File(file_path).getSize();
return *file_size;
}
std::unique_ptr<ReadBuffer> BackupEntryFromImmutableFile::getReadBuffer() const
{
if (disk)
return disk->readFile(file_path);
else
return createReadBufferFromFileBase(file_path, 0, 0, 0, nullptr);
}
}

View File

@ -0,0 +1,51 @@
#pragma once
#include <Backups/IBackupEntry.h>
#include <mutex>
namespace Poco { class TemporaryFile; }
namespace DB
{
class TemporaryFileOnDisk;
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed the file won't be changed.
class BackupEntryFromImmutableFile : public IBackupEntry
{
public:
/// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
BackupEntryFromImmutableFile(
const String & file_path_,
const std::optional<UInt64> & file_size_ = {},
const std::optional<UInt128> & checksum_ = {},
const std::shared_ptr<Poco::TemporaryFile> & temporary_file_ = {});
BackupEntryFromImmutableFile(
const DiskPtr & disk_,
const String & file_path_,
const std::optional<UInt64> & file_size_ = {},
const std::optional<UInt128> & checksum_ = {},
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
~BackupEntryFromImmutableFile() override;
UInt64 getSize() const override;
std::optional<UInt128> getChecksum() const override { return checksum; }
std::unique_ptr<ReadBuffer> getReadBuffer() const override;
String getFilePath() const { return file_path; }
DiskPtr getDisk() const { return disk; }
private:
const DiskPtr disk;
const String file_path;
mutable std::optional<UInt64> file_size;
mutable std::mutex get_file_size_mutex;
const std::optional<UInt128> checksum;
const std::shared_ptr<Poco::TemporaryFile> temporary_file;
const std::shared_ptr<TemporaryFileOnDisk> temporary_file_on_disk;
};
}

View File

@ -0,0 +1,23 @@
#include <Backups/BackupEntryFromMemory.h>
#include <IO/ReadBufferFromString.h>
namespace DB
{
BackupEntryFromMemory::BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_)
: BackupEntryFromMemory(String{reinterpret_cast<const char *>(data_), size_}, checksum_)
{
}
BackupEntryFromMemory::BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_)
: data(std::move(data_)), checksum(checksum_)
{
}
std::unique_ptr<ReadBuffer> BackupEntryFromMemory::getReadBuffer() const
{
return std::make_unique<ReadBufferFromString>(data);
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <Backups/IBackupEntry.h>
#include <IO/ReadBufferFromString.h>
namespace DB
{
/// Represents small preloaded data to be included in a backup.
class BackupEntryFromMemory : public IBackupEntry
{
public:
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_ = {});
BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_ = {});
UInt64 getSize() const override { return data.size(); }
std::optional<UInt128> getChecksum() const override { return checksum; }
std::unique_ptr<ReadBuffer> getReadBuffer() const override;
private:
const String data;
const std::optional<UInt128> checksum;
};
}

View File

@ -0,0 +1,39 @@
#include <Backups/BackupEntryFromSmallFile.h>
#include <Disks/IDisk.h>
#include <IO/createReadBufferFromFileBase.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace
{
String readFile(const String & file_path)
{
auto buf = createReadBufferFromFileBase(file_path, 0, 0, 0, nullptr);
String s;
readStringUntilEOF(s, *buf);
return s;
}
String readFile(const DiskPtr & disk, const String & file_path)
{
auto buf = disk->readFile(file_path);
String s;
readStringUntilEOF(s, *buf);
return s;
}
}
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const std::optional<UInt128> & checksum_)
: BackupEntryFromMemory(readFile(file_path_), checksum_), file_path(file_path_)
{
}
BackupEntryFromSmallFile::BackupEntryFromSmallFile(
const DiskPtr & disk_, const String & file_path_, const std::optional<UInt128> & checksum_)
: BackupEntryFromMemory(readFile(disk_, file_path_), checksum_), disk(disk_), file_path(file_path_)
{
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <Backups/BackupEntryFromMemory.h>
namespace DB
{
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
/// Represents a file prepared to be included in a backup,
/// assuming that the file is small and can be easily loaded into memory.
class BackupEntryFromSmallFile : public BackupEntryFromMemory
{
public:
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
BackupEntryFromSmallFile(
const String & file_path_,
const std::optional<UInt128> & checksum_ = {});
BackupEntryFromSmallFile(
const DiskPtr & disk_,
const String & file_path_,
const std::optional<UInt128> & checksum_ = {});
String getFilePath() const { return file_path; }
DiskPtr getDisk() const { return disk; }
private:
const DiskPtr disk;
const String file_path;
};
}

View File

@ -0,0 +1,65 @@
#include <Backups/BackupFactory.h>
#include <Backups/BackupInDirectory.h>
#include <Interpreters/Context.h>
#include <Disks/IVolume.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BACKUP_NOT_FOUND;
extern const int BACKUP_ALREADY_EXISTS;
extern const int NOT_ENOUGH_SPACE;
extern const int LOGICAL_ERROR;
}
BackupFactory & BackupFactory::instance()
{
static BackupFactory the_instance;
return the_instance;
}
void BackupFactory::setBackupsVolume(VolumePtr backups_volume_)
{
backups_volume = backups_volume_;
}
BackupMutablePtr BackupFactory::createBackup(const String & backup_name, UInt64 estimated_backup_size, const BackupPtr & base_backup) const
{
if (!backups_volume)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No backups volume");
for (const auto & disk : backups_volume->getDisks())
{
if (disk->exists(backup_name))
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", quoteString(backup_name));
}
auto reservation = backups_volume->reserve(estimated_backup_size);
if (!reservation)
throw Exception(
ErrorCodes::NOT_ENOUGH_SPACE,
"Couldn't reserve {} bytes of free space for new backup {}",
estimated_backup_size,
quoteString(backup_name));
return std::make_shared<BackupInDirectory>(IBackup::OpenMode::WRITE, reservation->getDisk(), backup_name, base_backup);
}
BackupPtr BackupFactory::openBackup(const String & backup_name, const BackupPtr & base_backup) const
{
if (!backups_volume)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No backups volume");
for (const auto & disk : backups_volume->getDisks())
{
if (disk->exists(backup_name))
return std::make_shared<BackupInDirectory>(IBackup::OpenMode::READ, disk, backup_name, base_backup);
}
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", quoteString(backup_name));
}
}

View File

@ -0,0 +1,38 @@
#pragma once
#include <Core/Types.h>
#include <boost/noncopyable.hpp>
#include <memory>
namespace DB
{
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupMutablePtr = std::shared_ptr<IBackup>;
class Context;
using ContextMutablePtr = std::shared_ptr<Context>;
class IVolume;
using VolumePtr = std::shared_ptr<IVolume>;
/// Factory for implementations of the IBackup interface.
class BackupFactory : boost::noncopyable
{
public:
static BackupFactory & instance();
/// Must be called to initialize the backup factory.
void setBackupsVolume(VolumePtr backups_volume_);
/// Creates a new backup and open it for writing.
BackupMutablePtr createBackup(const String & backup_name, UInt64 estimated_backup_size, const BackupPtr & base_backup = {}) const;
/// Opens an existing backup for reading.
BackupPtr openBackup(const String & backup_name, const BackupPtr & base_backup = {}) const;
private:
VolumePtr backups_volume;
};
}

View File

@ -0,0 +1,454 @@
#include <Backups/BackupInDirectory.h>
#include <Backups/BackupFactory.h>
#include <Backups/BackupEntryConcat.h>
#include <Backups/BackupEntryFromImmutableFile.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupEntry.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Common/quoteString.h>
#include <Disks/DiskSelector.h>
#include <Disks/IDisk.h>
#include <IO/HashingReadBuffer.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadHelpers.h>
#include <IO/SeekableReadBuffer.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <boost/range/adaptor/map.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int BACKUP_NOT_FOUND;
extern const int BACKUP_ALREADY_EXISTS;
extern const int BACKUP_VERSION_NOT_SUPPORTED;
extern const int BACKUP_DAMAGED;
extern const int NO_BASE_BACKUP;
extern const int WRONG_BASE_BACKUP;
extern const int BACKUP_ENTRY_ALREADY_EXISTS;
extern const int BACKUP_ENTRY_NOT_FOUND;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
namespace
{
const UInt64 BACKUP_VERSION = 1;
}
BackupInDirectory::BackupInDirectory(OpenMode open_mode_, const DiskPtr & disk_, const String & path_, const std::shared_ptr<const IBackup> & base_backup_)
: open_mode(open_mode_), disk(disk_), path(path_), path_with_sep(path_), base_backup(base_backup_)
{
if (!path_with_sep.ends_with('/'))
path_with_sep += '/';
trimRight(path, '/');
open();
}
BackupInDirectory::~BackupInDirectory()
{
close();
}
void BackupInDirectory::open()
{
if (open_mode == OpenMode::WRITE)
{
if (disk->exists(path))
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", quoteString(path));
disk->createDirectories(path);
directory_was_created = true;
writePathToBaseBackup();
}
if (open_mode == OpenMode::READ)
{
if (!disk->isDirectory(path))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", quoteString(path));
readContents();
readPathToBaseBackup();
}
}
void BackupInDirectory::close()
{
if (open_mode == OpenMode::WRITE)
{
if (!finalized && directory_was_created)
{
/// Creating of the backup wasn't finished correctly,
/// so the backup cannot be used and it's better to remove its files.
disk->removeRecursive(path);
}
}
}
void BackupInDirectory::writePathToBaseBackup()
{
String file_path = path_with_sep + ".base_backup";
if (!base_backup)
{
disk->removeFileIfExists(file_path);
return;
}
auto out = disk->writeFile(file_path);
writeString(base_backup->getPath(), *out);
}
void BackupInDirectory::readPathToBaseBackup()
{
if (base_backup)
return;
String file_path = path_with_sep + ".base_backup";
if (!disk->exists(file_path))
return;
auto in = disk->readFile(file_path);
String base_backup_path;
readStringUntilEOF(base_backup_path, *in);
if (base_backup_path.empty())
return;
base_backup = BackupFactory::instance().openBackup(base_backup_path);
}
void BackupInDirectory::writeContents()
{
auto out = disk->writeFile(path_with_sep + ".contents");
writeVarUInt(BACKUP_VERSION, *out);
writeVarUInt(infos.size(), *out);
for (const auto & [path_in_backup, info] : infos)
{
writeBinary(path_in_backup, *out);
writeVarUInt(info.size, *out);
if (info.size)
{
writeBinary(info.checksum, *out);
writeVarUInt(info.base_size, *out);
if (info.base_size && (info.base_size != info.size))
writeBinary(info.base_checksum, *out);
}
}
}
void BackupInDirectory::readContents()
{
auto in = disk->readFile(path_with_sep + ".contents");
UInt64 version;
readVarUInt(version, *in);
if (version != BACKUP_VERSION)
throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", quoteString(path), version);
size_t num_infos;
readVarUInt(num_infos, *in);
infos.clear();
for (size_t i = 0; i != num_infos; ++i)
{
String path_in_backup;
readBinary(path_in_backup, *in);
EntryInfo info;
readVarUInt(info.size, *in);
if (info.size)
{
readBinary(info.checksum, *in);
readVarUInt(info.base_size, *in);
if (info.base_size && (info.base_size != info.size))
readBinary(info.base_checksum, *in);
else if (info.base_size)
info.base_checksum = info.checksum;
}
infos.emplace(path_in_backup, info);
}
}
IBackup::OpenMode BackupInDirectory::getOpenMode() const
{
return open_mode;
}
String BackupInDirectory::getPath() const
{
return path;
}
Strings BackupInDirectory::list(const String & prefix, const String & terminator) const
{
if (!prefix.ends_with('/') && !prefix.empty())
throw Exception("prefix should end with '/'", ErrorCodes::BAD_ARGUMENTS);
std::lock_guard lock{mutex};
Strings elements;
for (auto it = infos.lower_bound(prefix); it != infos.end(); ++it)
{
const String & name = it->first;
if (!name.starts_with(prefix))
break;
size_t start_pos = prefix.length();
size_t end_pos = String::npos;
if (!terminator.empty())
end_pos = name.find(terminator, start_pos);
std::string_view new_element = std::string_view{name}.substr(start_pos, end_pos - start_pos);
if (!elements.empty() && (elements.back() == new_element))
continue;
elements.push_back(String{new_element});
}
return elements;
}
bool BackupInDirectory::exists(const String & name) const
{
std::lock_guard lock{mutex};
return infos.count(name) != 0;
}
size_t BackupInDirectory::getSize(const String & name) const
{
std::lock_guard lock{mutex};
auto it = infos.find(name);
if (it == infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
return it->second.size;
}
UInt128 BackupInDirectory::getChecksum(const String & name) const
{
std::lock_guard lock{mutex};
auto it = infos.find(name);
if (it == infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
return it->second.checksum;
}
BackupEntryPtr BackupInDirectory::read(const String & name) const
{
std::lock_guard lock{mutex};
auto it = infos.find(name);
if (it == infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
const auto & info = it->second;
if (!info.size)
{
/// Entry's data is empty.
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
}
if (!info.base_size)
{
/// Data goes completely from this backup, the base backup isn't used.
return std::make_unique<BackupEntryFromImmutableFile>(disk, path_with_sep + name, info.size, info.checksum);
}
if (info.size < info.base_size)
{
throw Exception(
ErrorCodes::BACKUP_DAMAGED,
"Backup {}: Entry {} has its data size less than in the base backup {}: {} < {}",
quoteString(path), quoteString(name), quoteString(base_backup->getPath()), info.size, info.base_size);
}
if (!base_backup)
{
throw Exception(
ErrorCodes::NO_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
quoteString(path), quoteString(name));
}
if (!base_backup->exists(name))
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
quoteString(path), quoteString(name));
}
auto base_entry = base_backup->read(name);
auto base_size = base_entry->getSize();
if (base_size != info.base_size)
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} has unexpected size in the base backup {}: {} (expected size: {})",
quoteString(path), quoteString(name), quoteString(base_backup->getPath()), base_size, info.base_size);
}
auto base_checksum = base_entry->getChecksum();
if (base_checksum && (*base_checksum != info.base_checksum))
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} has unexpected checksum in the base backup {}",
quoteString(path), quoteString(name), quoteString(base_backup->getPath()));
}
if (info.size == info.base_size)
{
/// Data goes completely from the base backup (nothing goes from this backup).
return base_entry;
}
/// The beginning of the data goes from the base backup,
/// and the ending goes from this backup.
return std::make_unique<BackupEntryConcat>(
std::move(base_entry),
std::make_unique<BackupEntryFromImmutableFile>(disk, path_with_sep + name, info.size - info.base_size),
info.checksum);
}
void BackupInDirectory::write(const String & name, BackupEntryPtr entry)
{
std::lock_guard lock{mutex};
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
if (infos.contains(name))
throw Exception(
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", quoteString(path), quoteString(name));
UInt64 size = entry->getSize();
std::optional<UInt128> checksum = entry->getChecksum();
/// Check if the entry's data is empty.
if (!size)
{
infos.emplace(name, EntryInfo{});
return;
}
/// Check if a entry with such name exists in the base backup.
bool base_exists = (base_backup && base_backup->exists(name));
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
if (base_exists)
{
base_size = base_backup->getSize(name);
base_checksum = base_backup->getChecksum(name);
}
std::unique_ptr<ReadBuffer> read_buffer; /// We'll set that later.
UInt64 read_pos = 0; /// Current position in read_buffer.
/// Determine whether it's possible to receive this entry's data from the base backup completely or partly.
bool use_base = false;
if (base_exists && base_size)
{
if (size == base_size)
{
/// The size is the same, we need to compare checksums to find out
/// if the entry's data has not been changed since the base backup.
if (!checksum)
{
read_buffer = entry->getReadBuffer();
HashingReadBuffer hashing_read_buffer{*read_buffer};
hashing_read_buffer.ignore(size);
read_pos = size;
checksum = hashing_read_buffer.getHash();
}
if (checksum == base_checksum)
use_base = true; /// The data has not been changed.
}
else if (size > base_size)
{
/// The size has been increased, we need to calculate a partial checksum to find out
/// if the entry's data has been only appended since the base backup.
read_buffer = entry->getReadBuffer();
HashingReadBuffer hashing_read_buffer{*read_buffer};
hashing_read_buffer.ignore(base_size);
UInt128 partial_checksum = hashing_read_buffer.getHash();
read_pos = base_size;
if (!checksum)
{
hashing_read_buffer.ignore(size - base_size);
checksum = hashing_read_buffer.getHash();
read_pos = size;
}
if (partial_checksum == base_checksum)
use_base = true; /// The data has been appended.
}
}
if (use_base && (size == base_size))
{
/// The entry's data has not been changed since the base backup.
EntryInfo info;
info.size = base_size;
info.checksum = base_checksum;
info.base_size = base_size;
info.base_checksum = base_checksum;
infos.emplace(name, info);
return;
}
{
/// Either the entry wasn't exist in the base backup
/// or the entry has data appended to the end of the data from the base backup.
/// In both those cases we have to copy data to this backup.
/// Find out where the start position to copy data is.
auto copy_pos = use_base ? base_size : 0;
/// Move the current read position to the start position to copy data.
/// If `read_buffer` is seekable it's easier, otherwise we can use ignore().
if ((read_pos > copy_pos) && !typeid_cast<SeekableReadBuffer *>(read_buffer.get()))
{
read_buffer.reset();
read_pos = 0;
}
if (!read_buffer)
read_buffer = entry->getReadBuffer();
if (read_pos != copy_pos)
{
if (auto * seekable_buffer = typeid_cast<SeekableReadBuffer *>(read_buffer.get()))
seekable_buffer->seek(copy_pos, SEEK_SET);
else if (copy_pos)
read_buffer->ignore(copy_pos - read_pos);
}
/// If we haven't received or calculated a checksum yet, calculate it now.
ReadBuffer * maybe_hashing_read_buffer = read_buffer.get();
std::optional<HashingReadBuffer> hashing_read_buffer;
if (!checksum)
maybe_hashing_read_buffer = &hashing_read_buffer.emplace(*read_buffer);
/// Copy the entry's data after `copy_pos`.
String out_file_path = path_with_sep + name;
disk->createDirectories(directoryPath(out_file_path));
auto out = disk->writeFile(out_file_path);
copyData(*maybe_hashing_read_buffer, *out, size - copy_pos);
if (hashing_read_buffer)
checksum = hashing_read_buffer->getHash();
/// Done!
EntryInfo info;
info.size = size;
info.checksum = *checksum;
if (use_base)
{
info.base_size = base_size;
info.base_checksum = base_checksum;
}
infos.emplace(name, info);
}
}
void BackupInDirectory::finalizeWriting()
{
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
writeContents();
finalized = true;
}
}

View File

@ -0,0 +1,66 @@
#pragma once
#include <Backups/IBackup.h>
#include <map>
#include <mutex>
namespace DB
{
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
/// Represents a backup stored on a disk.
/// A backup is stored as a directory, each entry is stored as a file in that directory.
/// Also three system files are stored:
/// 1) ".base" is an XML file with information about the base backup.
/// 2) ".contents" is a binary file containing a list of all entries along with their sizes
/// and checksums and information whether the base backup should be used for each entry
/// 3) ".write_lock" is a temporary empty file which is created before writing of a backup
/// and deleted after finishing that writing.
class BackupInDirectory : public IBackup
{
public:
BackupInDirectory(OpenMode open_mode_, const DiskPtr & disk_, const String & path_, const std::shared_ptr<const IBackup> & base_backup_ = {});
~BackupInDirectory() override;
OpenMode getOpenMode() const override;
String getPath() const override;
Strings list(const String & prefix, const String & terminator) const override;
bool exists(const String & name) const override;
size_t getSize(const String & name) const override;
UInt128 getChecksum(const String & name) const override;
BackupEntryPtr read(const String & name) const override;
void write(const String & name, BackupEntryPtr entry) override;
void finalizeWriting() override;
private:
void open();
void close();
void writePathToBaseBackup();
void readPathToBaseBackup();
void writeContents();
void readContents();
struct EntryInfo
{
UInt64 size = 0;
UInt128 checksum{0, 0};
/// for incremental backups
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
};
const OpenMode open_mode;
const DiskPtr disk;
String path;
String path_with_sep;
std::shared_ptr<const IBackup> base_backup;
std::map<String, EntryInfo> infos;
bool directory_was_created = false;
bool finalized = false;
mutable std::mutex mutex;
};
}

View File

@ -0,0 +1,89 @@
#include <Backups/BackupRenamingConfig.h>
#include <Parsers/ASTBackupQuery.h>
namespace DB
{
using Kind = ASTBackupQuery::Kind;
using ElementType = ASTBackupQuery::ElementType;
void BackupRenamingConfig::setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name)
{
old_to_new_table_names[old_table_name] = new_table_name;
}
void BackupRenamingConfig::setNewDatabaseName(const String & old_database_name, const String & new_database_name)
{
old_to_new_database_names[old_database_name] = new_database_name;
}
void BackupRenamingConfig::setNewTemporaryTableName(const String & old_temporary_table_name, const String & new_temporary_table_name)
{
old_to_new_temporary_table_names[old_temporary_table_name] = new_temporary_table_name;
}
void BackupRenamingConfig::setFromBackupQuery(const ASTBackupQuery & backup_query)
{
setFromBackupQueryElements(backup_query.elements);
}
void BackupRenamingConfig::setFromBackupQueryElements(const ASTBackupQuery::Elements & backup_query_elements)
{
for (const auto & element : backup_query_elements)
{
switch (element.type)
{
case ElementType::TABLE: [[fallthrough]];
case ElementType::DICTIONARY:
{
const auto & new_name = element.new_name.second.empty() ? element.name : element.new_name;
setNewTableName(element.name, new_name);
break;
}
case ASTBackupQuery::DATABASE:
{
const auto & new_name = element.new_name.first.empty() ? element.name.first : element.new_name.first;
setNewDatabaseName(element.name.first, new_name);
break;
}
case ASTBackupQuery::TEMPORARY_TABLE:
{
const auto & new_name = element.new_name.second.empty() ? element.name.second : element.new_name.second;
setNewTemporaryTableName(element.name.second, new_name);
break;
}
case ASTBackupQuery::ALL_DATABASES: break;
case ASTBackupQuery::ALL_TEMPORARY_TABLES: break;
case ASTBackupQuery::EVERYTHING: break;
}
}
}
DatabaseAndTableName BackupRenamingConfig::getNewTableName(const DatabaseAndTableName & old_table_name) const
{
auto it = old_to_new_table_names.find(old_table_name);
if (it != old_to_new_table_names.end())
return it->second;
return {getNewDatabaseName(old_table_name.first), old_table_name.second};
}
const String & BackupRenamingConfig::getNewDatabaseName(const String & old_database_name) const
{
auto it = old_to_new_database_names.find(old_database_name);
if (it != old_to_new_database_names.end())
return it->second;
return old_database_name;
}
const String & BackupRenamingConfig::getNewTemporaryTableName(const String & old_temporary_table_name) const
{
auto it = old_to_new_temporary_table_names.find(old_temporary_table_name);
if (it != old_to_new_temporary_table_names.end())
return it->second;
return old_temporary_table_name;
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <Parsers/ASTBackupQuery.h>
#include <Core/Types.h>
#include <map>
#include <unordered_map>
namespace DB
{
using DatabaseAndTableName = std::pair<String, String>;
/// Keeps information about renamings of databases or tables being processed
/// while we're making a backup or while we're restoring from a backup.
class BackupRenamingConfig
{
public:
BackupRenamingConfig() = default;
void setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name);
void setNewDatabaseName(const String & old_database_name, const String & new_database_name);
void setNewTemporaryTableName(const String & old_temporary_table_name, const String & new_temporary_table_name);
void setFromBackupQuery(const ASTBackupQuery & backup_query);
void setFromBackupQueryElements(const ASTBackupQuery::Elements & backup_query_elements);
/// Changes names according to the renaming.
DatabaseAndTableName getNewTableName(const DatabaseAndTableName & old_table_name) const;
const String & getNewDatabaseName(const String & old_database_name) const;
const String & getNewTemporaryTableName(const String & old_temporary_table_name) const;
private:
std::map<DatabaseAndTableName, DatabaseAndTableName> old_to_new_table_names;
std::unordered_map<String, String> old_to_new_database_names;
std::unordered_map<String, String> old_to_new_temporary_table_names;
};
using BackupRenamingConfigPtr = std::shared_ptr<const BackupRenamingConfig>;
}

View File

@ -0,0 +1,6 @@
#include <Backups/BackupSettings.h>
namespace DB
{
IMPLEMENT_SETTINGS_TRAITS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS)
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <Core/BaseSettings.h>
namespace DB
{
#define LIST_OF_BACKUP_SETTINGS(M) \
M(String, base_backup, "", "Name of the base backup. Only differences made after the base backup will be included in a newly created backup, so this option allows to make an incremental backup.", 0) \
DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS)
struct BackupSettings : public BaseSettings<BackupSettingsTraits> {};
}

830
src/Backups/BackupUtils.cpp Normal file
View File

@ -0,0 +1,830 @@
#include <Backups/BackupUtils.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/BackupRenamingConfig.h>
#include <Backups/IBackup.h>
#include <Backups/hasCompatibleDataToRestoreTable.h>
#include <Backups/renameInCreateQuery.h>
#include <Common/escapeForFileName.h>
#include <Databases/IDatabase.h>
#include <IO/ReadHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/formatAST.h>
#include <Storages/IStorage.h>
#include <common/insertAtEnd.h>
#include <boost/range/adaptor/reversed.hpp>
#include <filesystem>
namespace DB
{
namespace ErrorCodes
{
extern const int BACKUP_ELEMENT_DUPLICATE;
extern const int BACKUP_IS_EMPTY;
extern const int LOGICAL_ERROR;
extern const int TABLE_ALREADY_EXISTS;
extern const int CANNOT_RESTORE_TABLE;
}
namespace
{
using Kind = ASTBackupQuery::Kind;
using Element = ASTBackupQuery::Element;
using Elements = ASTBackupQuery::Elements;
using ElementType = ASTBackupQuery::ElementType;
/// Replace elements of types DICTIONARY or EVERYTHING with elements of other types.
void replaceElementTypesWithBaseElementTypes(Elements & elements)
{
for (size_t i = 0; i != elements.size(); ++i)
{
auto & element = elements[i];
switch (element.type)
{
case ElementType::DICTIONARY:
{
element.type = ElementType::TABLE;
break;
}
case ElementType::EVERYTHING:
{
element.type = ElementType::ALL_DATABASES;
auto & new_element = elements.emplace_back();
new_element.type = ElementType::ALL_TEMPORARY_TABLES;
break;
}
default:
break;
}
}
}
/// Replaces an empty database with the current database.
void replaceEmptyDatabaseWithCurrentDatabase(Elements & elements, const String & current_database)
{
for (auto & element : elements)
{
if (element.type == ElementType::TABLE)
{
if (element.name.first.empty() && !element.name.second.empty())
element.name.first = current_database;
if (element.new_name.first.empty() && !element.new_name.second.empty())
element.new_name.first = current_database;
}
}
}
/// Replaces elements of types TEMPORARY_TABLE or ALL_TEMPORARY_TABLES with elements of type TABLE or DATABASE.
void replaceTemporaryTablesWithTemporaryDatabase(Elements & elements)
{
for (size_t i = 0; i != elements.size(); ++i)
{
auto & element = elements[i];
switch (element.type)
{
case ElementType::TEMPORARY_TABLE:
{
element.type = ElementType::TABLE;
element.name.first = DatabaseCatalog::TEMPORARY_DATABASE;
if (element.new_name.first.empty() && !element.new_name.second.empty())
element.new_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
break;
}
case ElementType::ALL_TEMPORARY_TABLES:
{
element.type = ElementType::DATABASE;
element.name.first = DatabaseCatalog::TEMPORARY_DATABASE;
break;
}
default:
break;
}
}
}
/// Set new names if they are not specified.
void setNewNamesIfNotSet(Elements & elements)
{
for (auto & element : elements)
{
switch (element.type)
{
case ElementType::TABLE:
{
if (element.new_name.second.empty())
element.new_name = element.name;
break;
}
case ElementType::DATABASE:
{
if (element.new_name.first.empty())
element.new_name = element.name;
break;
}
default:
break;
}
}
}
/// Removes duplications in the elements of a backup query by removing some excessive elements and by updating except_lists.
/// This function helps deduplicate elements in queries like "BACKUP ALL DATABASES, DATABASE xxx USING NAME yyy"
/// (we need a deduplication for that query because `ALL DATABASES` includes `xxx` however we don't want
/// to backup/restore the same database twice while executing the same query).
/// Also this function slightly reorders elements: it puts databases before tables and dictionaries they contain.
void deduplicateAndReorderElements(Elements & elements)
{
std::set<size_t> skip_indices; /// Indices of elements which should be removed in the end of this function.
size_t index_all_databases = static_cast<size_t>(-1); /// Index of the first element of type ALL_DATABASES or -1 if not found.
struct DatabaseInfo
{
size_t index = static_cast<size_t>(-1);
std::unordered_map<std::string_view, size_t> tables;
};
std::unordered_map<std::string_view, DatabaseInfo> databases; /// Found databases and tables.
for (size_t i = 0; i != elements.size(); ++i)
{
auto & element = elements[i];
switch (element.type)
{
case ElementType::TABLE:
{
auto & tables = databases.emplace(element.name.first, DatabaseInfo{}).first->second.tables;
auto it = tables.find(element.name.second);
if (it == tables.end())
{
tables.emplace(element.name.second, i);
}
else
{
size_t prev_index = it->second;
if ((elements[i].new_name == elements[prev_index].new_name)
&& (elements[i].partitions.empty() == elements[prev_index].partitions.empty()))
{
insertAtEnd(elements[prev_index].partitions, elements[i].partitions);
skip_indices.emplace(i);
}
else
{
throw Exception(
"Table " + backQuote(element.name.first) + "." + backQuote(element.name.second) + " was specified twice",
ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
}
}
break;
}
case ElementType::DATABASE:
{
auto it = databases.find(element.name.first);
if (it == databases.end())
{
DatabaseInfo new_db_info;
new_db_info.index = i;
databases.emplace(element.name.first, new_db_info);
}
else if (it->second.index == static_cast<size_t>(-1))
{
it->second.index = i;
}
else
{
size_t prev_index = it->second.index;
if ((elements[i].new_name == elements[prev_index].new_name)
&& (elements[i].except_list == elements[prev_index].except_list))
{
skip_indices.emplace(i);
}
else
{
throw Exception("Database " + backQuote(element.name.first) + " was specified twice", ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
}
}
break;
}
case ElementType::ALL_DATABASES:
{
if (index_all_databases == static_cast<size_t>(-1))
{
index_all_databases = i;
}
else
{
size_t prev_index = index_all_databases;
if (elements[i].except_list == elements[prev_index].except_list)
skip_indices.emplace(i);
else
throw Exception("The tag ALL DATABASES was specified twice", ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
}
break;
}
default:
/// replaceElementTypesWithBaseElementTypes() and replaceTemporaryTablesWithTemporaryDatabase() should have removed all other element types.
throw Exception("Unexpected element type: " + std::to_string(static_cast<int>(element.type)), ErrorCodes::LOGICAL_ERROR);
}
}
if (index_all_databases != static_cast<size_t>(-1))
{
for (auto & [database_name, database] : databases)
{
elements[index_all_databases].except_list.emplace(database_name);
if (database.index == static_cast<size_t>(-1))
{
auto & new_element = elements.emplace_back();
new_element.type = ElementType::DATABASE;
new_element.name.first = database_name;
new_element.new_name = new_element.name;
database.index = elements.size() - 1;
}
}
}
for (auto & [database_name, database] : databases)
{
if (database.index == static_cast<size_t>(-1))
continue;
for (const auto & [table_name, table_index] : database.tables)
elements[database.index].except_list.emplace(table_name);
}
/// Reorder the elements: databases should be before tables and dictionaries they contain.
for (auto & [database_name, database] : databases)
{
if (database.index == static_cast<size_t>(-1))
continue;
size_t min_index = std::numeric_limits<size_t>::max();
auto min_index_it = database.tables.end();
for (auto it = database.tables.begin(); it != database.tables.end(); ++it)
{
if (min_index > it->second)
{
min_index = it->second;
min_index_it = it;
}
}
if (database.index > min_index)
{
std::swap(elements[database.index], elements[min_index]);
std::swap(database.index, min_index_it->second);
}
}
for (auto skip_index : skip_indices | boost::adaptors::reversed)
elements.erase(elements.begin() + skip_index);
}
Elements adjustElements(const Elements & elements, const String & current_database)
{
auto res = elements;
replaceElementTypesWithBaseElementTypes(res);
replaceEmptyDatabaseWithCurrentDatabase(res, current_database);
replaceTemporaryTablesWithTemporaryDatabase(res);
setNewNamesIfNotSet(res);
deduplicateAndReorderElements(res);
return res;
}
String getDataPathInBackup(const DatabaseAndTableName & table_name)
{
if (table_name.first.empty() || table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
assert(!table_name.first.empty() && !table_name.second.empty());
return String{"data/"} + escapeForFileName(table_name.first) + "/" + escapeForFileName(table_name.second) + "/";
}
String getDataPathInBackup(const IAST & create_query)
{
const auto & create = create_query.as<const ASTCreateQuery &>();
if (create.table.empty())
return {};
if (create.temporary)
return getDataPathInBackup({DatabaseCatalog::TEMPORARY_DATABASE, create.table});
return getDataPathInBackup({create.database, create.table});
}
String getMetadataPathInBackup(const DatabaseAndTableName & table_name)
{
if (table_name.first.empty() || table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
return String{"metadata/"} + escapeForFileName(table_name.first) + "/" + escapeForFileName(table_name.second) + ".sql";
}
String getMetadataPathInBackup(const String & database_name)
{
if (database_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name must not be empty");
return String{"metadata/"} + escapeForFileName(database_name) + ".sql";
}
String getMetadataPathInBackup(const IAST & create_query)
{
const auto & create = create_query.as<const ASTCreateQuery &>();
if (create.table.empty())
return getMetadataPathInBackup(create.database);
if (create.temporary)
return getMetadataPathInBackup({DatabaseCatalog::TEMPORARY_DATABASE, create.table});
return getMetadataPathInBackup({create.database, create.table});
}
void backupCreateQuery(const IAST & create_query, BackupEntries & backup_entries)
{
auto metadata_entry = std::make_unique<BackupEntryFromMemory>(serializeAST(create_query));
String metadata_path = getMetadataPathInBackup(create_query);
backup_entries.emplace_back(metadata_path, std::move(metadata_entry));
}
void backupTable(
const DatabaseAndTable & database_and_table,
const String & table_name,
const ASTs & partitions,
const ContextPtr & context,
const BackupRenamingConfigPtr & renaming_config,
BackupEntries & backup_entries)
{
const auto & database = database_and_table.first;
const auto & storage = database_and_table.second;
context->checkAccess(AccessType::SELECT, database->getDatabaseName(), table_name);
auto create_query = database->getCreateTableQuery(table_name, context);
ASTPtr new_create_query = renameInCreateQuery(create_query, renaming_config, context);
backupCreateQuery(*new_create_query, backup_entries);
auto data_backup = storage->backup(partitions, context);
if (!data_backup.empty())
{
String data_path = getDataPathInBackup(*new_create_query);
for (auto & [path_in_backup, backup_entry] : data_backup)
backup_entries.emplace_back(data_path + path_in_backup, std::move(backup_entry));
}
}
void backupDatabase(
const DatabasePtr & database,
const std::set<String> & except_list,
const ContextPtr & context,
const BackupRenamingConfigPtr & renaming_config,
BackupEntries & backup_entries)
{
context->checkAccess(AccessType::SHOW_TABLES, database->getDatabaseName());
auto create_query = database->getCreateDatabaseQuery();
ASTPtr new_create_query = renameInCreateQuery(create_query, renaming_config, context);
backupCreateQuery(*new_create_query, backup_entries);
for (auto it = database->getTablesIteratorForBackup(context); it->isValid(); it->next())
{
if (except_list.contains(it->name()))
continue;
backupTable({database, it->table()}, it->name(), {}, context, renaming_config, backup_entries);
}
}
void backupAllDatabases(
const std::set<String> & except_list,
const ContextPtr & context,
const BackupRenamingConfigPtr & renaming_config,
BackupEntries & backup_entries)
{
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
{
if (except_list.contains(database_name))
continue;
if (database_name == DatabaseCatalog::SYSTEM_DATABASE || database_name == DatabaseCatalog::TEMPORARY_DATABASE)
continue;
backupDatabase(database, {}, context, renaming_config, backup_entries);
}
}
void makeDatabaseIfNotExists(const String & database_name, ContextMutablePtr context)
{
if (DatabaseCatalog::instance().isDatabaseExist(database_name))
return;
/// We create and execute `create` query for the database name.
auto create_query = std::make_shared<ASTCreateQuery>();
create_query->database = database_name;
create_query->if_not_exists = true;
InterpreterCreateQuery create_interpreter{create_query, context};
create_interpreter.execute();
}
ASTPtr readCreateQueryFromBackup(const DatabaseAndTableName & table_name, const BackupPtr & backup)
{
String create_query_path = getMetadataPathInBackup(table_name);
auto read_buffer = backup->read(create_query_path)->getReadBuffer();
String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset();
ParserCreateQuery create_parser;
return parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
}
ASTPtr readCreateQueryFromBackup(const String & database_name, const BackupPtr & backup)
{
String create_query_path = getMetadataPathInBackup(database_name);
auto read_buffer = backup->read(create_query_path)->getReadBuffer();
String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset();
ParserCreateQuery create_parser;
return parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
}
void restoreTable(
const DatabaseAndTableName & table_name,
const ASTs & partitions,
ContextMutablePtr context,
const BackupPtr & backup,
const BackupRenamingConfigPtr & renaming_config,
RestoreObjectsTasks & restore_tasks)
{
ASTPtr create_query = readCreateQueryFromBackup(table_name, backup);
auto new_create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(renameInCreateQuery(create_query, renaming_config, context));
restore_tasks.emplace_back([table_name, new_create_query, partitions, context, backup]() -> RestoreDataTasks
{
DatabaseAndTableName new_table_name{new_create_query->database, new_create_query->table};
if (new_create_query->temporary)
new_table_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
context->checkAccess(AccessType::INSERT, new_table_name.first, new_table_name.second);
StoragePtr storage;
for (size_t try_index = 0; try_index != 10; ++try_index)
{
if (DatabaseCatalog::instance().isTableExist({new_table_name.first, new_table_name.second}, context))
{
DatabasePtr existing_database;
StoragePtr existing_storage;
std::tie(existing_database, existing_storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable({new_table_name.first, new_table_name.second}, context);
if (existing_storage)
{
if (auto existing_table_create_query = existing_database->tryGetCreateTableQuery(new_table_name.second, context))
{
if (hasCompatibleDataToRestoreTable(*new_create_query, existing_table_create_query->as<ASTCreateQuery &>()))
{
storage = existing_storage;
break;
}
else
{
String error_message = (new_table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
? ("Temporary table " + backQuoteIfNeed(new_table_name.second) + " already exists")
: ("Table " + backQuoteIfNeed(new_table_name.first) + "." + backQuoteIfNeed(new_table_name.second)
+ " already exists");
throw Exception(error_message, ErrorCodes::CANNOT_RESTORE_TABLE);
}
}
}
}
makeDatabaseIfNotExists(new_table_name.first, context);
try
{
InterpreterCreateQuery create_interpreter{new_create_query, context};
create_interpreter.execute();
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::TABLE_ALREADY_EXISTS)
throw;
}
}
if (!storage)
{
String error_message = (new_table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
? ("Could not create temporary table " + backQuoteIfNeed(new_table_name.second) + " for restoring")
: ("Could not create table " + backQuoteIfNeed(new_table_name.first) + "." + backQuoteIfNeed(new_table_name.second)
+ " for restoring");
throw Exception(error_message, ErrorCodes::CANNOT_RESTORE_TABLE);
}
String data_path_in_backup = getDataPathInBackup(table_name);
RestoreDataTasks restore_data_tasks = storage->restoreFromBackup(backup, data_path_in_backup, partitions, context);
/// Keep `storage` alive while we're executing `restore_data_tasks`.
for (auto & restore_data_task : restore_data_tasks)
restore_data_task = [restore_data_task, storage]() { restore_data_task(); };
return restore_data_tasks;
});
}
void restoreDatabase(const String & database_name, const std::set<String> & except_list, ContextMutablePtr context, const BackupPtr & backup, const BackupRenamingConfigPtr & renaming_config, RestoreObjectsTasks & restore_tasks)
{
ASTPtr create_query = readCreateQueryFromBackup(database_name, backup);
auto new_create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(renameInCreateQuery(create_query, renaming_config, context));
restore_tasks.emplace_back([database_name, new_create_query, except_list, context, backup, renaming_config]() -> RestoreDataTasks
{
const String & new_database_name = new_create_query->database;
context->checkAccess(AccessType::SHOW_TABLES, new_database_name);
if (!DatabaseCatalog::instance().isDatabaseExist(new_database_name))
{
/// We create and execute `create` query for the database name.
new_create_query->if_not_exists = true;
InterpreterCreateQuery create_interpreter{new_create_query, context};
create_interpreter.execute();
}
RestoreObjectsTasks restore_objects_tasks;
Strings table_names = backup->list("metadata/" + escapeForFileName(database_name) + "/", "/");
for (const String & table_name : table_names)
{
if (except_list.contains(table_name))
continue;
restoreTable({database_name, table_name}, {}, context, backup, renaming_config, restore_objects_tasks);
}
RestoreDataTasks restore_data_tasks;
for (auto & restore_object_task : restore_objects_tasks)
insertAtEnd(restore_data_tasks, std::move(restore_object_task)());
return restore_data_tasks;
});
}
void restoreAllDatabases(const std::set<String> & except_list, ContextMutablePtr context, const BackupPtr & backup, const BackupRenamingConfigPtr & renaming_config, RestoreObjectsTasks & restore_tasks)
{
restore_tasks.emplace_back([except_list, context, backup, renaming_config]() -> RestoreDataTasks
{
Strings database_names = backup->list("metadata/", "/");
RestoreObjectsTasks restore_objects_tasks;
for (const String & database_name : database_names)
{
if (except_list.contains(database_name))
continue;
restoreDatabase(database_name, {}, context, backup, renaming_config, restore_objects_tasks);
}
RestoreDataTasks restore_data_tasks;
for (auto & restore_object_task : restore_objects_tasks)
insertAtEnd(restore_data_tasks, std::move(restore_object_task)());
return restore_data_tasks;
});
}
}
BackupEntries makeBackupEntries(const Elements & elements, const ContextPtr & context)
{
BackupEntries backup_entries;
auto elements2 = adjustElements(elements, context->getCurrentDatabase());
auto renaming_config = std::make_shared<BackupRenamingConfig>();
renaming_config->setFromBackupQueryElements(elements2);
for (const auto & element : elements2)
{
switch (element.type)
{
case ElementType::TABLE:
{
const String & database_name = element.name.first;
const String & table_name = element.name.second;
auto [database, storage] = DatabaseCatalog::instance().getDatabaseAndTable({database_name, table_name}, context);
backupTable({database, storage}, table_name, element.partitions, context, renaming_config, backup_entries);
break;
}
case ElementType::DATABASE:
{
const String & database_name = element.name.first;
auto database = DatabaseCatalog::instance().getDatabase(database_name, context);
backupDatabase(database, element.except_list, context, renaming_config, backup_entries);
break;
}
case ElementType::ALL_DATABASES:
{
backupAllDatabases(element.except_list, context, renaming_config, backup_entries);
break;
}
default:
throw Exception("Unexpected element type", ErrorCodes::LOGICAL_ERROR); /// other element types have been removed in deduplicateElements()
}
}
/// A backup cannot be empty.
if (backup_entries.empty())
throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY);
/// Check that all backup entries are unique.
std::sort(
backup_entries.begin(),
backup_entries.end(),
[](const std::pair<String, std::unique_ptr<IBackupEntry>> & lhs, const std::pair<String, std::unique_ptr<IBackupEntry>> & rhs)
{
return lhs.first < rhs.first;
});
auto adjacent = std::adjacent_find(backup_entries.begin(), backup_entries.end());
if (adjacent != backup_entries.end())
throw Exception("Cannot write multiple entries with the same name " + quoteString(adjacent->first), ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
return backup_entries;
}
UInt64 estimateBackupSize(const BackupEntries & backup_entries, const BackupPtr & base_backup)
{
UInt64 total_size = 0;
for (const auto & [name, entry] : backup_entries)
{
UInt64 data_size = entry->getSize();
if (base_backup)
{
if (base_backup->exists(name) && (data_size == base_backup->getSize(name)))
{
auto checksum = entry->getChecksum();
if (checksum && (*checksum == base_backup->getChecksum(name)))
continue;
}
}
total_size += data_size;
}
return total_size;
}
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads)
{
if (!num_threads)
num_threads = 1;
std::vector<ThreadFromGlobalPool> threads;
size_t num_active_threads = 0;
std::mutex mutex;
std::condition_variable cond;
std::exception_ptr exception;
for (auto & name_and_entry : backup_entries)
{
auto & name = name_and_entry.first;
auto & entry = name_and_entry.second;
{
std::unique_lock lock{mutex};
if (exception)
break;
cond.wait(lock, [&] { return num_active_threads < num_threads; });
if (exception)
break;
++num_active_threads;
}
threads.emplace_back([backup, &name, &entry, &mutex, &cond, &num_active_threads, &exception]()
{
try
{
backup->write(name, std::move(entry));
}
catch (...)
{
std::lock_guard lock{mutex};
if (!exception)
exception = std::current_exception();
}
{
std::lock_guard lock{mutex};
--num_active_threads;
cond.notify_all();
}
});
}
for (auto & thread : threads)
thread.join();
backup_entries.clear();
if (exception)
{
/// We don't call finalizeWriting() if an error occurs.
/// And IBackup's implementation should remove the backup in its destructor if finalizeWriting() hasn't called before.
std::rethrow_exception(exception);
}
backup->finalizeWriting();
}
RestoreObjectsTasks makeRestoreTasks(const Elements & elements, ContextMutablePtr context, const BackupPtr & backup)
{
RestoreObjectsTasks restore_tasks;
auto elements2 = adjustElements(elements, context->getCurrentDatabase());
auto renaming_config = std::make_shared<BackupRenamingConfig>();
renaming_config->setFromBackupQueryElements(elements2);
for (const auto & element : elements2)
{
switch (element.type)
{
case ElementType::TABLE:
{
const String & database_name = element.name.first;
const String & table_name = element.name.second;
restoreTable({database_name, table_name}, element.partitions, context, backup, renaming_config, restore_tasks);
break;
}
case ElementType::DATABASE:
{
const String & database_name = element.name.first;
auto database = DatabaseCatalog::instance().getDatabase(database_name, context);
restoreDatabase(database_name, element.except_list, context, backup, renaming_config, restore_tasks);
break;
}
case ElementType::ALL_DATABASES:
{
restoreAllDatabases(element.except_list, context, backup, renaming_config, restore_tasks);
break;
}
default:
throw Exception("Unexpected element type", ErrorCodes::LOGICAL_ERROR); /// other element types have been removed in deduplicateElements()
}
}
return restore_tasks;
}
void executeRestoreTasks(RestoreObjectsTasks && restore_tasks, size_t num_threads)
{
if (!num_threads)
num_threads = 1;
RestoreDataTasks restore_data_tasks;
for (auto & restore_object_task : restore_tasks)
insertAtEnd(restore_data_tasks, std::move(restore_object_task)());
restore_tasks.clear();
std::vector<ThreadFromGlobalPool> threads;
size_t num_active_threads = 0;
std::mutex mutex;
std::condition_variable cond;
std::exception_ptr exception;
for (auto & restore_data_task : restore_data_tasks)
{
{
std::unique_lock lock{mutex};
if (exception)
break;
cond.wait(lock, [&] { return num_active_threads < num_threads; });
if (exception)
break;
++num_active_threads;
}
threads.emplace_back([&restore_data_task, &mutex, &cond, &num_active_threads, &exception]() mutable
{
try
{
restore_data_task();
restore_data_task = {};
}
catch (...)
{
std::lock_guard lock{mutex};
if (!exception)
exception = std::current_exception();
}
{
std::lock_guard lock{mutex};
--num_active_threads;
cond.notify_all();
}
});
}
for (auto & thread : threads)
thread.join();
restore_data_tasks.clear();
if (exception)
std::rethrow_exception(exception);
}
}

39
src/Backups/BackupUtils.h Normal file
View File

@ -0,0 +1,39 @@
#pragma once
#include <Parsers/ASTBackupQuery.h>
namespace DB
{
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupMutablePtr = std::shared_ptr<IBackup>;
class IBackupEntry;
using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
using RestoreDataTask = std::function<void()>;
using RestoreDataTasks = std::vector<RestoreDataTask>;
using RestoreObjectTask = std::function<RestoreDataTasks()>;
using RestoreObjectsTasks = std::vector<RestoreObjectTask>;
class Context;
using ContextPtr = std::shared_ptr<const Context>;
using ContextMutablePtr = std::shared_ptr<Context>;
/// Prepares backup entries.
BackupEntries makeBackupEntries(const ASTBackupQuery::Elements & elements, const ContextPtr & context);
/// Estimate total size of the backup which would be written from the specified entries.
UInt64 estimateBackupSize(const BackupEntries & backup_entries, const BackupPtr & base_backup);
/// Write backup entries to an opened backup.
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads);
/// Prepare restore tasks.
RestoreObjectsTasks makeRestoreTasks(const ASTBackupQuery::Elements & elements, ContextMutablePtr context, const BackupPtr & backup);
/// Execute restore tasks.
void executeRestoreTasks(RestoreObjectsTasks && restore_tasks, size_t num_threads);
}

View File

65
src/Backups/IBackup.h Normal file
View File

@ -0,0 +1,65 @@
#pragma once
#include <Core/Types.h>
#include <memory>
namespace DB
{
class IBackupEntry;
using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
/// Represents a backup, i.e. a storage of BackupEntries which can be accessed by their names.
/// A backup can be either incremental or non-incremental. An incremental backup doesn't store
/// the data of the entries which are not changed compared to its base backup.
class IBackup
{
public:
virtual ~IBackup() = default;
enum class OpenMode
{
READ,
WRITE,
};
/// A backup can be open either in READ or WRITE mode.
virtual OpenMode getOpenMode() const = 0;
/// Returns the path to the backup.
virtual String getPath() const = 0;
/// Returns names of entries stored in the backup.
/// If `prefix` isn't empty the function will return only the names starting with
/// the prefix (but without the prefix itself).
/// If the `terminator` isn't empty the function will returns only parts of the names
/// before the terminator. For example, list("", "") returns names of all the entries
/// in the backup; and list("data/", "/") return kind of a list of folders and
/// files stored in the "data/" directory inside the backup.
virtual Strings list(const String & prefix = "", const String & terminator = "/") const = 0;
/// Checks if an entry with a specified name exists.
virtual bool exists(const String & name) const = 0;
/// Returns the size of the entry's data.
/// This function does the same as `read(name)->getSize()` but faster.
virtual size_t getSize(const String & name) const = 0;
/// Returns the checksum of the entry's data.
/// This function does the same as `read(name)->getCheckum()` but faster.
virtual UInt128 getChecksum(const String & name) const = 0;
/// Reads an entry from the backup.
virtual BackupEntryPtr read(const String & name) const = 0;
/// Puts a new entry to the backup.
virtual void write(const String & name, BackupEntryPtr entry) = 0;
/// Finalizes writing the backup, should be called after all entries have been successfully written.
virtual void finalizeWriting() = 0;
};
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupMutablePtr = std::shared_ptr<IBackup>;
}

View File

@ -0,0 +1,32 @@
#pragma once
#include <Core/Types.h>
#include <memory>
#include <optional>
#include <vector>
namespace DB
{
class ReadBuffer;
/// A backup entry represents some data which should be written to the backup or has been read from the backup.
class IBackupEntry
{
public:
virtual ~IBackupEntry() = default;
/// Returns the size of the data.
virtual UInt64 getSize() const = 0;
/// Returns the checksum of the data if it's precalculated.
/// Can return nullopt which means the checksum should be calculated from the read buffer.
virtual std::optional<UInt128> getChecksum() const { return {}; }
/// Returns a read buffer for reading the data.
virtual std::unique_ptr<ReadBuffer> getReadBuffer() const = 0;
};
using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
}

View File

@ -0,0 +1,22 @@
#include <Backups/hasCompatibleDataToRestoreTable.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
namespace DB
{
bool hasCompatibleDataToRestoreTable(const ASTCreateQuery & query1, const ASTCreateQuery & query2)
{
/// TODO: Write more subtle condition here.
auto q1 = typeid_cast<std::shared_ptr<ASTCreateQuery>>(query1.clone());
auto q2 = typeid_cast<std::shared_ptr<ASTCreateQuery>>(query2.clone());
/// Remove UUIDs.
q1->uuid = UUIDHelpers::Nil;
q2->uuid = UUIDHelpers::Nil;
return serializeAST(*q1) == serializeAST(*q2);
}
}

View File

@ -0,0 +1,11 @@
#pragma once
namespace DB
{
class ASTCreateQuery;
/// Whether the data of the first table can be inserted to the second table.
bool hasCompatibleDataToRestoreTable(const ASTCreateQuery & query1, const ASTCreateQuery & query2);
}

View File

@ -0,0 +1,276 @@
#include <Backups/renameInCreateQuery.h>
#include <Backups/BackupRenamingConfig.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/evaluateConstantExpression.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
class RenameInCreateQueryTransformMatcher
{
public:
struct Data
{
BackupRenamingConfigPtr renaming_config;
ContextPtr context;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
static void visit(ASTPtr & ast, const Data & data)
{
if (auto * create = ast->as<ASTCreateQuery>())
visitCreateQuery(*create, data);
else if (auto * expr = ast->as<ASTTableExpression>())
visitTableExpression(*expr, data);
else if (auto * function = ast->as<ASTFunction>())
visitFunction(*function, data);
else if (auto * dictionary = ast->as<ASTDictionary>())
visitDictionary(*dictionary, data);
}
private:
/// Replaces names of tables and databases used in a CREATE query, which can be either CREATE TABLE or
/// CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
static void visitCreateQuery(ASTCreateQuery & create, const Data & data)
{
if (create.temporary)
{
if (create.table.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table name specified in the CREATE TEMPORARY TABLE query must not be empty");
create.table = data.renaming_config->getNewTemporaryTableName(create.table);
}
else if (create.table.empty())
{
if (create.database.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE DATABASE query must not be empty");
create.database = data.renaming_config->getNewDatabaseName(create.database);
}
else
{
if (create.database.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE TABLE query must not be empty");
std::tie(create.database, create.table) = data.renaming_config->getNewTableName({create.database, create.table});
}
create.uuid = UUIDHelpers::Nil;
if (!create.as_table.empty() && !create.as_database.empty())
std::tie(create.as_database, create.as_table) = data.renaming_config->getNewTableName({create.as_database, create.as_table});
if (!create.to_table_id.table_name.empty() && !create.to_table_id.database_name.empty())
{
auto to_table = data.renaming_config->getNewTableName({create.to_table_id.database_name, create.to_table_id.table_name});
create.to_table_id = StorageID{to_table.first, to_table.second};
}
}
/// Replaces names of a database and a table in a expression like `db`.`table`
static void visitTableExpression(ASTTableExpression & expr, const Data & data)
{
if (!expr.database_and_table_name)
return;
ASTIdentifier * id = expr.database_and_table_name->as<ASTIdentifier>();
if (!id)
return;
auto table_id = id->createTable();
if (!table_id)
return;
const String & db_name = table_id->getDatabaseName();
const String & table_name = table_id->shortName();
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_config->getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
expr.database_and_table_name = std::make_shared<ASTIdentifier>(Strings{new_db_name, new_table_name});
expr.children.push_back(expr.database_and_table_name);
}
/// Replaces names of tables and databases used in arguments of a table function or a table engine.
static void visitFunction(ASTFunction & function, const Data & data)
{
if ((function.name == "merge") || (function.name == "Merge"))
{
visitFunctionMerge(function, data);
}
else if ((function.name == "remote") || (function.name == "remoteSecure") || (function.name == "cluster") ||
(function.name == "clusterAllReplicas") || (function.name == "Distributed"))
{
visitFunctionRemote(function, data);
}
}
/// Replaces a database's name passed via an argument of the function merge() or the table engine Merge.
static void visitFunctionMerge(ASTFunction & function, const Data & data)
{
if (!function.arguments)
return;
/// The first argument is a database's name and we can rename it.
/// The second argument is a regular expression and we can do nothing about it.
auto & args = function.arguments->as<ASTExpressionList &>().children;
size_t db_name_arg_index = 0;
if (args.size() <= db_name_arg_index)
return;
String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_arg_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
if (db_name.empty())
return;
String new_db_name = data.renaming_config->getNewDatabaseName(db_name);
if (new_db_name == db_name)
return;
args[db_name_arg_index] = std::make_shared<ASTLiteral>(new_db_name);
}
/// Replaces names of a table and a database passed via arguments of the function remote() or cluster() or the table engine Distributed.
static void visitFunctionRemote(ASTFunction & function, const Data & data)
{
if (!function.arguments)
return;
/// The first argument is an address or cluster's name, so we skip it.
/// The second argument can be either 'db.name' or just 'db' followed by the third argument 'table'.
auto & args = function.arguments->as<ASTExpressionList &>().children;
const auto * second_arg_as_function = args[1]->as<ASTFunction>();
if (second_arg_as_function && TableFunctionFactory::instance().isTableFunctionName(second_arg_as_function->name))
return;
size_t db_name_index = 1;
if (args.size() <= db_name_index)
return;
String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
String table_name;
size_t table_name_index = static_cast<size_t>(-1);
size_t dot = String::npos;
if (function.name != "Distributed")
dot = db_name.find('.');
if (dot != String::npos)
{
table_name = db_name.substr(dot + 1);
db_name.resize(dot);
}
else
{
table_name_index = 2;
if (args.size() <= table_name_index)
return;
table_name = evaluateConstantExpressionForDatabaseName(args[table_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
}
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_config->getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
if (table_name_index != static_cast<size_t>(-1))
{
if (new_db_name != db_name)
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
if (new_table_name != table_name)
args[table_name_index] = std::make_shared<ASTLiteral>(new_table_name);
}
else
{
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
args.insert(args.begin() + db_name_index + 1, std::make_shared<ASTLiteral>(new_table_name));
}
}
/// Replaces names of a table and a database used in source parameters of a dictionary.
static void visitDictionary(ASTDictionary & dictionary, const Data & data)
{
if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements)
return;
auto & elements = dictionary.source->elements->as<ASTExpressionList &>().children;
String db_name, table_name;
size_t db_name_index = static_cast<size_t>(-1);
size_t table_name_index = static_cast<size_t>(-1);
for (size_t i = 0; i != elements.size(); ++i)
{
auto & pair = elements[i]->as<ASTPair &>();
if (pair.first == "db")
{
if (db_name_index != static_cast<size_t>(-1))
return;
db_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
db_name_index = i;
}
else if (pair.first == "table")
{
if (table_name_index != static_cast<size_t>(-1))
return;
table_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
table_name_index = i;
}
}
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_config->getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
if (new_db_name != db_name)
{
auto & pair = elements[db_name_index]->as<ASTPair &>();
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_db_name));
}
if (new_table_name != table_name)
{
auto & pair = elements[table_name_index]->as<ASTPair &>();
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_table_name));
}
}
};
using RenameInCreateQueryTransformVisitor = InDepthNodeVisitor<RenameInCreateQueryTransformMatcher, false>;
}
ASTPtr renameInCreateQuery(const ASTPtr & ast, const BackupRenamingConfigPtr & renaming_config, const ContextPtr & context)
{
auto new_ast = ast->clone();
try
{
RenameInCreateQueryTransformVisitor::Data data{renaming_config, context};
RenameInCreateQueryTransformVisitor{data}.visit(new_ast);
return new_ast;
}
catch (...)
{
tryLogCurrentException("Backup", "Error while renaming in AST");
return ast;
}
}
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <memory>
namespace DB
{
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
class Context;
using ContextPtr = std::shared_ptr<const Context>;
class BackupRenamingConfig;
using BackupRenamingConfigPtr = std::shared_ptr<const BackupRenamingConfig>;
/// Changes names in AST according to the renaming settings.
ASTPtr renameInCreateQuery(const ASTPtr & ast, const BackupRenamingConfigPtr & renaming_config, const ContextPtr & context);
}

View File

@ -45,6 +45,7 @@ if (COMPILER_GCC)
endif ()
add_subdirectory (Access)
add_subdirectory (Backups)
add_subdirectory (Columns)
add_subdirectory (Common)
add_subdirectory (Core)
@ -180,6 +181,7 @@ macro(add_object_library name common_path)
endmacro()
add_object_library(clickhouse_access Access)
add_object_library(clickhouse_backups Backups)
add_object_library(clickhouse_core Core)
add_object_library(clickhouse_core_mysql Core/MySQL)
add_object_library(clickhouse_compression Compression)

View File

@ -566,6 +566,23 @@
M(595, BZIP2_STREAM_ENCODER_FAILED) \
M(596, INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH) \
M(597, NO_SUCH_ERROR_CODE) \
M(598, BACKUP_ALREADY_EXISTS) \
M(599, BACKUP_NOT_FOUND) \
M(600, BACKUP_VERSION_NOT_SUPPORTED) \
M(601, BACKUP_DAMAGED) \
M(602, NO_BASE_BACKUP) \
M(603, WRONG_BASE_BACKUP) \
M(604, BACKUP_ENTRY_ALREADY_EXISTS) \
M(605, BACKUP_ENTRY_NOT_FOUND) \
M(606, BACKUP_IS_EMPTY) \
M(607, BACKUP_ELEMENT_DUPLICATE) \
M(608, CANNOT_RESTORE_TABLE) \
\
M(598, FUNCTION_ALREADY_EXISTS) \
M(599, CANNOT_DROP_SYSTEM_FUNCTION) \
M(600, CANNOT_CREATE_RECURSIVE_FUNCTION) \
M(601, OBJECT_ALREADY_STORED_ON_DISK) \
M(602, OBJECT_WAS_NOT_STORED_ON_DISK) \
\
M(998, POSTGRESQL_CONNECTION_FAILURE) \
M(999, KEEPER_EXCEPTION) \

View File

@ -276,14 +276,30 @@ inline void trimLeft(std::string_view & str, char c = ' ')
str.remove_prefix(1);
}
inline void trimLeft(std::string & str, char c = ' ')
{
str.erase(0, str.find_first_not_of(c));
}
inline void trimRight(std::string_view & str, char c = ' ')
{
while (str.ends_with(c))
str.remove_suffix(1);
}
inline void trimRight(std::string & str, char c = ' ')
{
str.erase(str.find_last_not_of(c) + 1);
}
inline void trim(std::string_view & str, char c = ' ')
{
trimLeft(str, c);
trimRight(str, c);
}
inline void trim(std::string & str, char c = ' ')
{
trimRight(str, c);
trimLeft(str, c);
}

131
src/Common/isValidUTF8.cpp Normal file
View File

@ -0,0 +1,131 @@
#include <Common/isValidUTF8.h>
#include <cstring>
/// inspired by https://github.com/cyb70289/utf8/
/*
MIT License
Copyright (c) 2019 Yibo Cai
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/*
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
*
* Table 3-7. Well-Formed UTF-8 Byte Sequences
*
* +--------------------+------------+-------------+------------+-------------+
* | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
* +--------------------+------------+-------------+------------+-------------+
* | U+0000..U+007F | 00..7F | | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0080..U+07FF | C2..DF | 80..BF | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
*/
namespace DB
{
namespace UTF8
{
UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
{
while (len)
{
int bytes;
const UInt8 byte1 = data[0];
/* 00..7F */
if (byte1 <= 0x7F)
{
bytes = 1;
}
/* C2..DF, 80..BF */
else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && static_cast<Int8>(data[1]) <= static_cast<Int8>(0xBF))
{
bytes = 2;
}
else if (len >= 3)
{
const UInt8 byte2 = data[1];
bool byte2_ok = static_cast<Int8>(byte2) <= static_cast<Int8>(0xBF);
bool byte3_ok = static_cast<Int8>(data[2]) <= static_cast<Int8>(0xBF);
if (byte2_ok && byte3_ok &&
/* E0, A0..BF, 80..BF */
((byte1 == 0xE0 && byte2 >= 0xA0) ||
/* E1..EC, 80..BF, 80..BF */
(byte1 >= 0xE1 && byte1 <= 0xEC) ||
/* ED, 80..9F, 80..BF */
(byte1 == 0xED && byte2 <= 0x9F) ||
/* EE..EF, 80..BF, 80..BF */
(byte1 >= 0xEE && byte1 <= 0xEF)))
{
bytes = 3;
}
else if (len >= 4)
{
bool byte4_ok = static_cast<Int8>(data[3]) <= static_cast<Int8>(0xBF);
if (byte2_ok && byte3_ok && byte4_ok &&
/* F0, 90..BF, 80..BF, 80..BF */
((byte1 == 0xF0 && byte2 >= 0x90) ||
/* F1..F3, 80..BF, 80..BF, 80..BF */
(byte1 >= 0xF1 && byte1 <= 0xF3) ||
/* F4, 80..8F, 80..BF, 80..BF */
(byte1 == 0xF4 && byte2 <= 0x8F)))
{
bytes = 4;
}
else
{
return false;
}
}
else
{
return false;
}
}
else
{
return false;
}
len -= bytes;
data += bytes;
}
return true;
}
}
}

10
src/Common/isValidUTF8.h Normal file
View File

@ -0,0 +1,10 @@
#pragma once
#include <common/types.h>
namespace DB::UTF8
{
UInt8 isValidUTF8(const UInt8 * data, UInt64 len);
}

View File

@ -116,6 +116,7 @@ SRCS(
hasLinuxCapability.cpp
hex.cpp
isLocalAddress.cpp
isValidUTF8.cpp
malloc.cpp
memory.cpp
new_delete.cpp

View File

@ -28,12 +28,6 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename T, typename SFINAE = void>
struct NearestFieldTypeImpl;
template <typename T>
using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
class Field;
using FieldVector = std::vector<Field, AllocatorWithMemoryTracking<Field>>;
@ -168,6 +162,12 @@ template <> constexpr inline bool is_decimal_field<DecimalField<Decimal64>> = tr
template <> constexpr inline bool is_decimal_field<DecimalField<Decimal128>> = true;
template <> constexpr inline bool is_decimal_field<DecimalField<Decimal256>> = true;
template <typename T, typename SFINAE = void>
struct NearestFieldTypeImpl;
template <typename T>
using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
/// char may be signed or unsigned, and behave identically to signed char or unsigned char,
/// but they are always three different types.
/// signedness of char is different in Linux on x86 and Linux on ARM.
@ -230,6 +230,16 @@ struct NearestFieldTypeImpl<T, std::enable_if_t<std::is_enum_v<T>>>
using Type = NearestFieldType<std::underlying_type_t<T>>;
};
template <typename T>
decltype(auto) castToNearestFieldType(T && x)
{
using U = NearestFieldType<std::decay_t<T>>;
if constexpr (std::is_same_v<std::decay_t<T>, U>)
return std::forward<T>(x);
else
return U(x);
}
/** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
* NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
*/
@ -322,9 +332,10 @@ public:
/// Templates to avoid ambiguity.
template <typename T, typename Z = void *>
using enable_if_not_field_or_stringlike_t = std::enable_if_t<
!std::is_same_v<std::decay_t<T>, Field>
&& !std::is_same_v<NearestFieldType<std::decay_t<T>>, String>, Z>;
using enable_if_not_field_or_bool_or_stringlike_t = std::enable_if_t<
!std::is_same_v<std::decay_t<T>, Field> &&
!std::is_same_v<std::decay_t<T>, bool> &&
!std::is_same_v<NearestFieldType<std::decay_t<T>>, String>, Z>;
Field() //-V730
: which(Types::Null)
@ -345,7 +356,9 @@ public:
}
template <typename T>
Field(T && rhs, enable_if_not_field_or_stringlike_t<T> = nullptr);
Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t<T> = nullptr);
Field(bool rhs) : Field(castToNearestFieldType(rhs)) {}
/// Create a string inplace.
Field(const std::string_view & str) { create(str.data(), str.size()); }
@ -395,9 +408,11 @@ public:
/// 1. float <--> int needs explicit cast
/// 2. customized types needs explicit cast
template <typename T>
enable_if_not_field_or_stringlike_t<T, Field> &
enable_if_not_field_or_bool_or_stringlike_t<T, Field> &
operator=(T && rhs);
Field & operator= (bool rhs) { return *this = castToNearestFieldType(rhs); }
Field & operator= (const std::string_view & str);
Field & operator= (const String & str) { return *this = std::string_view{str}; }
Field & operator= (String && str);
@ -876,24 +891,14 @@ template <> inline constexpr const char * TypeName<AggregateFunctionStateData> =
template <typename T>
decltype(auto) castToNearestFieldType(T && x)
{
using U = NearestFieldType<std::decay_t<T>>;
if constexpr (std::is_same_v<std::decay_t<T>, U>)
return std::forward<T>(x);
else
return U(x);
}
template <typename T>
Field::Field(T && rhs, enable_if_not_field_or_stringlike_t<T>) //-V730
Field::Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t<T>) //-V730
{
auto && val = castToNearestFieldType(std::forward<T>(rhs));
createConcrete(std::forward<decltype(val)>(val));
}
template <typename T>
Field::enable_if_not_field_or_stringlike_t<T, Field> &
Field::enable_if_not_field_or_bool_or_stringlike_t<T, Field> &
Field::operator=(T && rhs)
{
auto && val = castToNearestFieldType(std::forward<T>(rhs));
@ -908,7 +913,6 @@ Field::operator=(T && rhs)
return *this;
}
inline Field & Field::operator=(const std::string_view & str)
{
if (which != Types::String)

View File

@ -120,7 +120,7 @@ class IColumn;
M(UInt64, parallel_replicas_count, 0, "", 0) \
M(UInt64, parallel_replica_offset, 0, "", 0) \
\
M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
\
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \
@ -157,8 +157,8 @@ class IColumn;
M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
\
M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
\
M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \
@ -171,8 +171,8 @@ class IColumn;
M(UInt64, priority, 0, "Priority of the query. 1 - the highest, higher value - lower priority; 0 - do not use priorities.", 0) \
M(Int64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \
\
M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \
M(Bool, log_formatted_queries, 0, "Log formatted queries and write the log to the system table.", 0) \
M(Bool, log_queries, true, "Log requests and write the log to the system table.", 0) \
M(Bool, log_formatted_queries, false, "Log formatted queries and write the log to the system table.", 0) \
M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log/query_views_log.", 0) \
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
@ -198,10 +198,10 @@ class IColumn;
\
M(Float, memory_tracker_fault_probability, 0., "For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability.", 0) \
\
M(Bool, enable_http_compression, 0, "Compress the result if the client over HTTP said that it understands data compressed by gzip or deflate.", 0) \
M(Bool, enable_http_compression, false, "Compress the result if the client over HTTP said that it understands data compressed by gzip or deflate.", 0) \
M(Int64, http_zlib_compression_level, 3, "Compression level - used if the client on HTTP said that it understands data compressed by gzip or deflate.", 0) \
\
M(Bool, http_native_compression_disable_checksumming_on_decompress, 0, "If you uncompress the POST data from the client compressed by the native format, do not check the checksum.", 0) \
M(Bool, http_native_compression_disable_checksumming_on_decompress, false, "If you uncompress the POST data from the client compressed by the native format, do not check the checksum.", 0) \
\
M(String, count_distinct_implementation, "uniqExact", "What aggregate function to use for implementation of count(DISTINCT ...)", 0) \
\
@ -215,9 +215,9 @@ class IColumn;
\
M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \
\
M(Bool, fsync_metadata, 1, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
\
M(Bool, join_use_nulls, 0, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
\
M(JoinStrictness, join_default_strictness, JoinStrictness::ALL, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \
@ -225,7 +225,7 @@ class IColumn;
M(UInt64, preferred_block_size_bytes, 1000000, "", 0) \
\
M(UInt64, max_replica_delay_for_distributed_queries, 300, "If set, distributed queries of Replicated tables will choose servers with replication delay in seconds less than the specified value (not inclusive). Zero means do not take delay into account.", 0) \
M(Bool, fallback_to_stale_replicas_for_distributed_queries, 1, "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is enabled, the query will be performed anyway, otherwise the error will be reported.", 0) \
M(Bool, fallback_to_stale_replicas_for_distributed_queries, true, "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is enabled, the query will be performed anyway, otherwise the error will be reported.", 0) \
M(UInt64, preferred_max_column_in_block_size_bytes, 0, "Limit on max column size in block while reading. Helps to decrease cache misses count. Should be close to L2 cache size.", 0) \
\
M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \
@ -241,7 +241,7 @@ class IColumn;
/** Settings for testing connection collector */ \
M(Milliseconds, sleep_in_receive_cancel_ms, 0, "Time to sleep in receiving cancel in TCPHandler", 0) \
\
M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
M(Bool, insert_allow_materialized_columns, false, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
M(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP receive timeout", 0) \
@ -354,24 +354,26 @@ class IColumn;
M(UInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.", 0)\
M(UInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.", 0) \
\
M(UInt64, max_backup_threads, 0, "The maximum number of threads to execute a BACKUP or RESTORE request. By default, it is determined automatically.", 0) \
\
M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
M(Bool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \
M(Bool, log_query_views, true, "Log query dependent views into system.query_views_log table. This setting have effect only when 'log_queries' is true.", 0) \
M(String, log_comment, "", "Log comment into system.query_log table and server log. It can be set to arbitrary string no longer than max_query_size.", 0) \
M(LogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
M(Bool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \
M(Bool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \
M(Bool, allow_push_predicate_when_subquery_contains_with, 1, "Allows push predicate when subquery contains WITH clause", 0) \
M(Bool, enable_optimize_predicate_expression, true, "If it is set to true, optimize predicates to subqueries.", 0) \
M(Bool, enable_optimize_predicate_expression_to_final_subquery, true, "Allow push predicate to final subquery.", 0) \
M(Bool, allow_push_predicate_when_subquery_contains_with, true, "Allows push predicate when subquery contains WITH clause", 0) \
\
M(UInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.", 0) \
M(Bool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.", 0) \
M(Bool, decimal_check_overflow, true, "Check overflow of decimal arithmetic/comparison operations", 0) \
\
M(Bool, prefer_localhost_replica, 1, "1 - always send query to local replica, if it exists. 0 - choose replica to send query between local and remote ones according to load_balancing", 0) \
M(Bool, prefer_localhost_replica, true, "If it's true then queries will be always sent to local replica (if it exists). If it's false then replica to send a query will be chosen between local and remote ones according to load_balancing", 0) \
M(UInt64, max_fetch_partition_retries_count, 5, "Amount of retries while fetching partition from another host.", 0) \
M(UInt64, http_max_multipart_form_data_size, 1024 * 1024 * 1024, "Limit on size of multipart/form-data content. This setting cannot be parsed from URL parameters and should be set in user profile. Note that content is parsed and external tables are created in memory before start of query execution. And this is the only limit that has effect on that stage (limits on max memory usage and max execution time have no effect while reading HTTP form data).", 0) \
M(Bool, calculate_text_stack_trace, 1, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.", 0) \
M(Bool, calculate_text_stack_trace, true, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.", 0) \
M(Bool, allow_ddl, true, "If it is set to true, then a user is allowed to executed DDL queries.", 0) \
M(Bool, parallel_view_processing, false, "Enables pushing to attached views concurrently instead of sequentially.", 0) \
M(Bool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \
@ -518,8 +520,8 @@ class IColumn;
#define FORMAT_FACTORY_SETTINGS(M) \
M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
M(Bool, format_csv_allow_single_quotes, 1, "If it is set to true, allow strings in single quotes.", 0) \
M(Bool, format_csv_allow_double_quotes, 1, "If it is set to true, allow strings in double quotes.", 0) \
M(Bool, format_csv_allow_single_quotes, true, "If it is set to true, allow strings in single quotes.", 0) \
M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \
M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
M(Bool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices \\N", 0) \

View File

@ -3,6 +3,40 @@
using namespace DB;
GTEST_TEST(Field, FromBool)
{
{
Field f{false};
ASSERT_EQ(f.getType(), Field::Types::UInt64);
ASSERT_EQ(f.get<UInt64>(), 0);
ASSERT_EQ(f.get<bool>(), false);
}
{
Field f{true};
ASSERT_EQ(f.getType(), Field::Types::UInt64);
ASSERT_EQ(f.get<UInt64>(), 1);
ASSERT_EQ(f.get<bool>(), true);
}
{
Field f;
f = false;
ASSERT_EQ(f.getType(), Field::Types::UInt64);
ASSERT_EQ(f.get<UInt64>(), 0);
ASSERT_EQ(f.get<bool>(), false);
}
{
Field f;
f = true;
ASSERT_EQ(f.getType(), Field::Types::UInt64);
ASSERT_EQ(f.get<UInt64>(), 1);
ASSERT_EQ(f.get<bool>(), true);
}
}
GTEST_TEST(Field, Move)
{
Field f;

View File

@ -70,7 +70,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
// Do not deduplicate insertions into MV if the main insertion is Ok
if (disable_deduplication_for_children)
insert_context->setSetting("insert_deduplicate", Field{false});
insert_context->setSetting("insert_deduplicate", false);
// Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
if (insert_settings.min_insert_block_size_rows_for_materialized_views)

View File

@ -403,7 +403,7 @@ void DatabaseAtomic::assertCanBeDetached(bool cleanup)
}
DatabaseTablesIteratorPtr
DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name)
DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name) const
{
auto base_iter = DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name);
return std::make_unique<AtomicDatabaseTablesSnapshotIterator>(std::move(typeid_cast<DatabaseTablesSnapshotIterator &>(*base_iter)));

View File

@ -45,7 +45,7 @@ public:
void drop(ContextPtr /*context*/) override;
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override;

View File

@ -52,7 +52,7 @@ DatabaseDictionary::DatabaseDictionary(const String & name_, ContextPtr context_
{
}
Tables DatabaseDictionary::listTables(const FilterByNameFunction & filter_by_name)
Tables DatabaseDictionary::listTables(const FilterByNameFunction & filter_by_name) const
{
Tables tables;
auto load_results = getContext()->getExternalDictionariesLoader().getLoadResults(filter_by_name);
@ -77,7 +77,7 @@ StoragePtr DatabaseDictionary::tryGetTable(const String & table_name, ContextPtr
return createStorageDictionary(getDatabaseName(), load_result, getContext());
}
DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name)
DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const
{
return std::make_unique<DatabaseTablesSnapshotIterator>(listTables(filter_by_table_name), getDatabaseName());
}

View File

@ -34,7 +34,7 @@ public:
StoragePtr tryGetTable(const String & table_name, ContextPtr context) const override;
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
bool empty() const override;
@ -50,7 +50,7 @@ protected:
private:
Poco::Logger * log;
Tables listTables(const FilterByNameFunction & filter_by_name);
Tables listTables(const FilterByNameFunction & filter_by_name) const;
};
}

Some files were not shown because too many files have changed in this diff Show More