Merge branch 'master' into merge_selecting_backoff

This commit is contained in:
Alexander Tokmakov 2021-11-11 13:43:10 +03:00
commit 627dde7aba
171 changed files with 5248 additions and 1182 deletions

32
.github/workflows/backport.yml vendored Normal file
View File

@ -0,0 +1,32 @@
name: CherryPick
concurrency:
group: cherry-pick
on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */3 * * *'
jobs:
CherryPick:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Cherry pick
env:
TEMP_PATH: ${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
REPO_OWNER: "ClickHouse"
REPO_NAME: "ClickHouse"
REPO_TEAM: "core"
run: |
sudo pip install GitPython
cd $GITHUB_WORKSPACE/tests/ci
python3 cherry_pick.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

View File

@ -121,8 +121,86 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderDebAsan:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 3
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebRelease:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 0
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderReport: BuilderReport:
needs: [BuilderDebDebug] needs: [BuilderDebDebug, BuilderDebAsan, BuilderDebRelease]
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
steps: steps:
- name: Download json reports - name: Download json reports
@ -207,6 +285,36 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
FunctionalStatelessTestFlakyCheck:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_flaky_asan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests flaky check (address, actions)'
REPO_COPY: ${{runner.temp}}/stateless_flaky_asan/ClickHouse
REQUIRED_BUILD_NUMBER: 3
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $REQUIRED_BUILD_NUMBER $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
StressTestDebug: StressTestDebug:
needs: [BuilderDebDebug] needs: [BuilderDebDebug]
runs-on: [self-hosted, stress-tester] runs-on: [self-hosted, stress-tester]
@ -265,6 +373,64 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
IntegrationTestsAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
env:
TEMP_PATH: ${{runner.temp}}/integration_tests_asan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Integration tests (asan, actions)'
REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse
REQUIRED_BUILD_NUMBER: 3
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME" $REQUIRED_BUILD_NUMBER
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
UnitTestsAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Unit test
env:
TEMP_PATH: ${{runner.temp}}/unit_tests_asan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Unit tests (asan, actions)'
REPO_COPY: ${{runner.temp}}/unit_tests_asan/ClickHouse
REQUIRED_BUILD_NUMBER: 3
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 unit_tests_check.py "$CHECK_NAME" $REQUIRED_BUILD_NUMBER
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FastTest: FastTest:
needs: DockerHubPush needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
@ -288,8 +454,32 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
PVSCheck:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, func-tester]
steps:
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: PVS Check
env:
TEMP_PATH: ${{runner.temp}}/pvs_check
REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 pvs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck: FinishCheck:
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug, DocsCheck, StressTestDebug, ASTFuzzerTestDebug] needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug, DocsCheck, StressTestDebug, ASTFuzzerTestDebug, IntegrationTestsAsan, PVSCheck, UnitTestsAsan]
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
steps: steps:
- name: Check out repository code - name: Check out repository code

View File

@ -1,4 +1,4 @@
### ClickHouse release v21.11, 2021-11-07 ### ClickHouse release v21.11, 2021-11-09
#### Backward Incompatible Change #### Backward Incompatible Change

View File

@ -108,6 +108,11 @@ public:
LocalDate toDate() const { return LocalDate(m_year, m_month, m_day); } LocalDate toDate() const { return LocalDate(m_year, m_month, m_day); }
LocalDateTime toStartOfDate() const { return LocalDateTime(m_year, m_month, m_day, 0, 0, 0); } LocalDateTime toStartOfDate() const { return LocalDateTime(m_year, m_month, m_day, 0, 0, 0); }
time_t to_time_t(const DateLUTImpl & time_zone = DateLUT::instance()) const
{
return time_zone.makeDateTime(m_year, m_month, m_day, m_hour, m_minute, m_second);
}
std::string toString() const std::string toString() const
{ {
std::string s{"0000-00-00 00:00:00"}; std::string s{"0000-00-00 00:00:00"};

View File

@ -16,17 +16,6 @@
*/ */
uint64_t getMemoryAmountOrZero() uint64_t getMemoryAmountOrZero()
{ {
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
uint64_t amount = 0; // in case of read error
cgroup_limit >> amount;
return amount;
}
#endif
int64_t num_pages = sysconf(_SC_PHYS_PAGES); int64_t num_pages = sysconf(_SC_PHYS_PAGES);
if (num_pages <= 0) if (num_pages <= 0)
return 0; return 0;
@ -35,7 +24,22 @@ uint64_t getMemoryAmountOrZero()
if (page_size <= 0) if (page_size <= 0)
return 0; return 0;
return num_pages * page_size; uint64_t memory_amount = num_pages * page_size;
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
uint64_t memory_limit = 0; // in case of read error
cgroup_limit >> memory_limit;
if (memory_limit > 0 && memory_limit < memory_amount)
memory_amount = memory_limit;
}
#endif
return memory_amount;
} }

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 082e55f17d1c58bf124290fb044fea40e985ec11 Subproject commit a8c37ee001af1ae88e5dfa637ae5b31b087c96d3

View File

@ -81,11 +81,11 @@ then
fi fi
# Also build fuzzers if any sanitizer specified # Also build fuzzers if any sanitizer specified
if [ -n "$SANITIZER" ] # if [ -n "$SANITIZER" ]
then # then
# Currently we are in build/build_docker directory # # Currently we are in build/build_docker directory
../docker/packager/other/fuzzer.sh # ../docker/packager/other/fuzzer.sh
fi # fi
ccache --show-config ||: ccache --show-config ||:
ccache --show-stats ||: ccache --show-stats ||:

View File

@ -31,15 +31,15 @@ then
fi fi
# Also build fuzzers if any sanitizer specified # Also build fuzzers if any sanitizer specified
if [ -n "$SANITIZER" ] # if [ -n "$SANITIZER" ]
then # then
# Script is supposed that we are in build directory. # # Script is supposed that we are in build directory.
mkdir -p build/build_docker # mkdir -p build/build_docker
cd build/build_docker # cd build/build_docker
# Launching build script # # Launching build script
../docker/packager/other/fuzzer.sh # ../docker/packager/other/fuzzer.sh
cd # cd
fi # fi
ccache --show-config ||: ccache --show-config ||:
ccache --show-stats ||: ccache --show-stats ||:

View File

@ -2,7 +2,7 @@ version: '2.3'
services: services:
postgres1: postgres1:
image: postgres image: postgres
command: ["postgres", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_filename=postgresql.log", "-c", "log_statement=all", "-c", "max_connections=200"] command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=2", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_filename=postgresql.log", "-c", "log_statement=all", "-c", "max_connections=200"]
restart: always restart: always
expose: expose:
- ${POSTGRES_PORT} - ${POSTGRES_PORT}
@ -11,7 +11,6 @@ services:
interval: 10s interval: 10s
timeout: 5s timeout: 5s
retries: 5 retries: 5
command: [ "postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=2"]
networks: networks:
default: default:
aliases: aliases:

View File

@ -34,7 +34,7 @@ then
if [ "${ARCH}" = "x86_64" ] if [ "${ARCH}" = "x86_64" ]
then then
DIR="macos" DIR="macos"
elif [ "${ARCH}" = "aarch64" ] elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then then
DIR="macos-aarch64" DIR="macos-aarch64"
fi fi

View File

@ -138,6 +138,9 @@ empty input fields are replaced with default values. For complex default express
Each element of [Nested](../sql-reference/data-types/nested-data-structures/nested.md) structures is represented as array. Each element of [Nested](../sql-reference/data-types/nested-data-structures/nested.md) structures is represented as array.
In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) to optimize ENUM parsing.
For example: For example:
``` sql ``` sql
@ -408,6 +411,9 @@ empty unquoted input values are replaced with default values. For complex defaul
`NULL` is formatted according to setting [format_csv_null_representation](../operations/settings/settings.md#settings-format_csv_null_representation) (default value is `\N`). `NULL` is formatted according to setting [format_csv_null_representation](../operations/settings/settings.md#settings-format_csv_null_representation) (default value is `\N`).
In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](../operations/settings/settings.md#settings-input_format_csv_enum_as_number) to optimize ENUM parsing.
The CSV format supports the output of totals and extremes the same way as `TabSeparated`. The CSV format supports the output of totals and extremes the same way as `TabSeparated`.
## CSVWithNames {#csvwithnames} ## CSVWithNames {#csvwithnames}

View File

@ -172,5 +172,6 @@ toc_title: Adopters
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | | <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://www.deepl.com/" class="favicon">Deepl</a> | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) | | <a href="https://www.deepl.com/" class="favicon">Deepl</a> | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) |
| <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide--> [Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -425,12 +425,12 @@ Enabled by default.
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
Enables or disables parsing enum values as enum ids for TSV input format. When enabled, always treat enum values as enum ids for TSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing.
Possible values: Possible values:
- 0 — Enum values are parsed as values. - 0 — Enum values are parsed as values or as enum IDs.
- 1 — Enum values are parsed as enum IDs. - 1 — Enum values are parsed only as enum IDs.
Default value: 0. Default value: 0.
@ -444,10 +444,39 @@ CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first'
When the `input_format_tsv_enum_as_number` setting is enabled: When the `input_format_tsv_enum_as_number` setting is enabled:
Query:
```sql ```sql
SET input_format_tsv_enum_as_number = 1; SET input_format_tsv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; SELECT * FROM table_with_enum_column_for_tsv_insert;
```
Result:
```text
┌──Id─┬─Value──┐
│ 102 │ second │
└─────┴────────┘
```
Query:
```sql
SET input_format_tsv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first';
```
throws an exception.
When the `input_format_tsv_enum_as_number` setting is disabled:
Query:
```sql
SET input_format_tsv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first';
SELECT * FROM table_with_enum_column_for_tsv_insert; SELECT * FROM table_with_enum_column_for_tsv_insert;
``` ```
@ -462,15 +491,6 @@ Result:
└─────┴────────┘ └─────┴────────┘
``` ```
When the `input_format_tsv_enum_as_number` setting is disabled, the `INSERT` query:
```sql
SET input_format_tsv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
```
throws an exception.
## input_format_null_as_default {#settings-input-format-null-as-default} ## input_format_null_as_default {#settings-input-format-null-as-default}
Enables or disables the initialization of [NULL](../../sql-reference/syntax.md#null-literal) fields with [default values](../../sql-reference/statements/create/table.md#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). Enables or disables the initialization of [NULL](../../sql-reference/syntax.md#null-literal) fields with [default values](../../sql-reference/statements/create/table.md#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
@ -1216,6 +1236,9 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes. Could be used for throttling speed when replicating the data to add or replace new nodes.
!!! note "Note"
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server} ## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) sends for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth) setting. Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) sends for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth) setting.
@ -1233,6 +1256,9 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes. Could be used for throttling speed when replicating the data to add or replace new nodes.
!!! note "Note"
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms} ## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the shard and replica sections are used in the cluster definition. The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the shard and replica sections are used in the cluster definition.
@ -1588,12 +1614,12 @@ The character is interpreted as a delimiter in the CSV data. By default, the del
## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
Enables or disables parsing enum values as enum ids for CSV input format. When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing.
Possible values: Possible values:
- 0 — Enum values are parsed as values. - 0 — Enum values are parsed as values or as enum IDs.
- 1 — Enum values are parsed as enum IDs. - 1 — Enum values are parsed only as enum IDs.
Default value: 0. Default value: 0.
@ -1607,29 +1633,52 @@ CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first'
When the `input_format_csv_enum_as_number` setting is enabled: When the `input_format_csv_enum_as_number` setting is enabled:
Query:
```sql ```sql
SET input_format_csv_enum_as_number = 1; SET input_format_csv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2
```
Result:
```text
┌──Id─┬─Value──┐
│ 102 │ second │
└─────┴────────┘
```
Query:
```sql
SET input_format_csv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first'
```
throws an exception.
When the `input_format_csv_enum_as_number` setting is disabled:
Query:
```sql
SET input_format_csv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first'
SELECT * FROM table_with_enum_column_for_csv_insert; SELECT * FROM table_with_enum_column_for_csv_insert;
``` ```
Result: Result:
```text ```text
┌──Id─┬─Value─────┐ ┌──Id─┬─Value──┐
│ 102 │ second │ │ 102 │ second │
└─────┴───────────┘ └─────┴────────┘
┌──Id─┬─Value─┐
│ 103 │ first │
└─────┴───────┘
``` ```
When the `input_format_csv_enum_as_number` setting is disabled, the `INSERT` query:
```sql
SET input_format_csv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
```
throws an exception.
## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}
Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).

View File

@ -0,0 +1,77 @@
# system.session_log {#system_tables-session_log}
Contains information about all successful and failed login and logout events.
Columns:
- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values:
- `LoginFailure` — Login error.
- `LoginSuccess` — Successful login.
- `Logout` — Logout from the system.
- `auth_id` ([UUID](../../sql-reference/data-types/uuid.md)) — Authentication ID, which is a UUID that is automatically generated each time user logins.
- `session_id` ([String](../../sql-reference/data-types/string.md)) — Session ID that is passed by client via [HTTP](../../interfaces/http.md) interface.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Login/logout date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Login/logout time.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Login/logout starting time with microseconds precision.
- `user` ([String](../../sql-reference/data-types/string.md)) — User name.
- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)) — The authentication type. Possible values:
- `NO_PASSWORD`
- `PLAINTEXT_PASSWORD`
- `SHA256_PASSWORD`
- `DOUBLE_SHA1_PASSWORD`
- `LDAP`
- `KERBEROS`
- `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users.
- `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied.
- `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out.
- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — The IP address that was used to log in/out.
- `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to log in/out.
- `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — The interface from which the login was initiated. Possible values:
- `TCP`
- `HTTP`
- `gRPC`
- `MySQL`
- `PostgreSQL`
- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — The hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run.
- `client_name` ([String](../../sql-reference/data-types/string.md)) — The `clickhouse-client` or another TCP client name.
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the `clickhouse-client` or another TCP client.
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The major version of the `clickhouse-client` or another TCP client.
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The minor version of the `clickhouse-client` or another TCP client.
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the `clickhouse-client` or another TCP client version.
- `failure_reason` ([String](../../sql-reference/data-types/string.md)) — The exception message containing the reason for the login/logout failure.
**Example**
Query:
``` sql
SELECT * FROM system.session_log LIMIT 1 FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
type: LoginSuccess
auth_id: 45e6bd83-b4aa-4a23-85e6-bd83b4aa1a23
session_id:
event_date: 2021-10-14
event_time: 2021-10-14 20:33:52
event_time_microseconds: 2021-10-14 20:33:52.104247
user: default
auth_type: PLAINTEXT_PASSWORD
profiles: ['default']
roles: []
settings: [('load_balancing','random'),('max_memory_usage','10000000000')]
client_address: ::ffff:127.0.0.1
client_port: 38490
interface: TCP
client_hostname:
client_name: ClickHouse client
client_revision: 54449
client_version_major: 21
client_version_minor: 10
client_version_patch: 0
failure_reason:
```

View File

@ -6,7 +6,7 @@ Columns:
- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy. - `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy.
- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. - `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy.
- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. - `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`).
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. - `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy.
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. - `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.

View File

@ -75,6 +75,9 @@ Ext4 is the most reliable option. Set the mount options `noatime`.
XFS should be avoided. It works mostly fine but there are some reports about lower performance. XFS should be avoided. It works mostly fine but there are some reports about lower performance.
Most other file systems should also work fine. Most other file systems should also work fine.
Do not use compressed filesystems, because ClickHouse does compression on its own and better.
It's not recommended to use encrypted filesystems, because you can use builtin encryption in ClickHouse, which is better.
## Linux Kernel {#linux-kernel} ## Linux Kernel {#linux-kernel}
Dont use an outdated Linux kernel. Dont use an outdated Linux kernel.

View File

@ -2463,3 +2463,39 @@ Result:
│ Linux 4.15.0-55-generic │ │ Linux 4.15.0-55-generic │
└─────────────────────────┘ └─────────────────────────┘
``` ```
## zookeeperSessionUptime {#zookeepersessionuptime}
Returns the uptime of the current ZooKeeper session in seconds.
**Syntax**
``` sql
zookeeperSessionUptime()
```
**Arguments**
- None.
**Returned value**
- Uptime of the current ZooKeeper session in seconds.
Type: [UInt32](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT zookeeperSessionUptime();
```
Result:
``` text
┌─zookeeperSessionUptime()─┐
│ 286 │
└──────────────────────────┘
```

View File

@ -212,6 +212,9 @@ SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple(); CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10; INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x); ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM tmp; SELECT groupArray(x), groupArray(s) FROM tmp;
``` ```

View File

@ -1146,6 +1146,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test'
Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов. Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов.
!!! note "Note"
60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8).
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server} ## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
Ограничивает максимальную скорость обмена данными в сети (в байтах в секунду) для [репликационных](../../engines/table-engines/mergetree-family/replication.md) отправок. Применяется только при запуске сервера. Можно также ограничить скорость для конкретной таблицы с помощью настройки [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth). Ограничивает максимальную скорость обмена данными в сети (в байтах в секунду) для [репликационных](../../engines/table-engines/mergetree-family/replication.md) отправок. Применяется только при запуске сервера. Можно также ограничить скорость для конкретной таблицы с помощью настройки [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth).
@ -1163,6 +1166,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test'
Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов. Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов.
!!! note "Note"
60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8).
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms} ## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
Таймаут в миллисекундах на соединение с удалённым сервером, для движка таблиц Distributed, если используются секции shard и replica в описании кластера. Таймаут в миллисекундах на соединение с удалённым сервером, для движка таблиц Distributed, если используются секции shard и replica в описании кластера.

View File

@ -0,0 +1,77 @@
# system.session_log {#system_tables-session_log}
Содержит информацию о всех успешных и неудачных событиях входа и выхода из системы.
Столбцы:
- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — результат входа или выхода из системы. Возможные значения:
- `LoginFailure` — ошибка входа в систему.
- `LoginSuccess` — успешный вход в систему.
- `Logout` — выход из системы.
- `auth_id` ([UUID](../../sql-reference/data-types/uuid.md)) — идентификатор аутентификации, представляющий собой UUID, который автоматически генерируется при каждом входе пользователя в систему.
- `session_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор сессии, который передается клиентом через [HTTP](../../interfaces/http.md)-интерфейс.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата входа или выхода из системы.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время входа или выхода из системы.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала входа или выхода из системы с точностью до микросекунд.
- `user` ([String](../../sql-reference/data-types/string.md)) — имя пользователя.
- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип аутентификации. Возможные значения:
- `NO_PASSWORD`
- `PLAINTEXT_PASSWORD`
- `SHA256_PASSWORD`
- `DOUBLE_SHA1_PASSWORD`
- `LDAP`
- `KERBEROS`
- `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — список профилей, установленных для всех ролей и (или) пользователей.
- `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — список ролей, к которым применяется данный профиль.
- `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — настройки, которые были изменены при входе или выходе клиента из системы.
- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP-адрес, который использовался для входа или выхода из системы.
- `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт клиента, который использовался для входа или выхода из системы.
- `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — интерфейс, с которого был инициирован вход в систему. Возможные значения:
- `TCP`
- `HTTP`
- `gRPC`
- `MySQL`
- `PostgreSQL`
- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста клиентской машины, с которой присоединился [clickhouse-client](../../interfaces/cli.md) или другой TCP клиент.
- `client_name` ([String](../../sql-reference/data-types/string.md)) — `clickhouse-client` или другой TCP клиент.
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия `clickhouse-client` или другого TCP клиента.
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — старшая версия `clickhouse-client` или другого TCP клиента.
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — младшая версия `clickhouse-client` или другого TCP клиента.
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — патч `clickhouse-client` или другого TCP клиента.
- `failure_reason` ([String](../../sql-reference/data-types/string.md)) — сообщение об исключении, содержащее причину сбоя при входе или выходе из системы.
**Пример**
Запрос:
``` sql
SELECT * FROM system.session_log LIMIT 1 FORMAT Vertical;
```
Результат:
``` text
Row 1:
──────
type: LoginSuccess
auth_id: 45e6bd83-b4aa-4a23-85e6-bd83b4aa1a23
session_id:
event_date: 2021-10-14
event_time: 2021-10-14 20:33:52
event_time_microseconds: 2021-10-14 20:33:52.104247
user: default
auth_type: PLAINTEXT_PASSWORD
profiles: ['default']
roles: []
settings: [('load_balancing','random'),('max_memory_usage','10000000000')]
client_address: ::ffff:127.0.0.1
client_port: 38490
interface: TCP
client_hostname:
client_name: ClickHouse client
client_revision: 54449
client_version_major: 21
client_version_minor: 10
client_version_patch: 0
failure_reason:
```

View File

@ -6,7 +6,7 @@
- `policy_name` ([String](../../sql-reference/data-types/string.md)) — имя политики хранения. - `policy_name` ([String](../../sql-reference/data-types/string.md)) — имя политики хранения.
- `volume_name` ([String](../../sql-reference/data-types/string.md)) — имя тома, который содержится в политике хранения. - `volume_name` ([String](../../sql-reference/data-types/string.md)) — имя тома, который содержится в политике хранения.
- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер тома согласно конфигурации. - `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер тома согласно конфигурации, приоритет согласно которому данные заполняют тома, т.е. данные при инсертах и мержах записываются на тома с более низким приоритетом (с учетом других правил: TTL, `max_data_part_size`, `move_factor`).
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — имена дисков, содержащихся в политике хранения. - `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — имена дисков, содержащихся в политике хранения.
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений). - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений).
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). - `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1).

View File

@ -2411,3 +2411,39 @@ SELECT getOSKernelVersion();
│ Linux 4.15.0-55-generic │ │ Linux 4.15.0-55-generic │
└─────────────────────────┘ └─────────────────────────┘
``` ```
## zookeeperSessionUptime {#zookeepersessionuptime}
Возвращает аптайм текущего сеанса ZooKeeper в секундах.
**Синтаксис**
``` sql
zookeeperSessionUptime()
```
**Аргументы**
- Нет.
**Возвращаемое значение**
- Аптайм текущего сеанса ZooKeeper в секундах.
Тип: [UInt32](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT zookeeperSessionUptime();
```
Результат:
``` text
┌─zookeeperSessionUptime()─┐
│ 286 │
└──────────────────────────┘
```

View File

@ -212,6 +212,9 @@ SET mutations_sync = 2;
CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple(); CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple();
INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10; INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10;
ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x); ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x);
ALTER TABLE tmp MATERIALIZE COLUMN s;
SELECT groupArray(x), groupArray(s) FROM tmp; SELECT groupArray(x), groupArray(s) FROM tmp;
``` ```

View File

@ -330,7 +330,9 @@ std::vector<String> Client::loadWarningMessages()
{ {
std::vector<String> messages; std::vector<String> messages;
connection->sendQuery(connection_parameters.timeouts, "SELECT message FROM system.warnings", "" /* query_id */, connection->sendQuery(connection_parameters.timeouts, "SELECT message FROM system.warnings", "" /* query_id */,
QueryProcessingStage::Complete, nullptr, nullptr, false); QueryProcessingStage::Complete,
&global_context->getSettingsRef(),
&global_context->getClientInfo(), false);
while (true) while (true)
{ {
Packet packet = connection->receivePacket(); Packet packet = connection->receivePacket();

View File

@ -595,8 +595,8 @@ if (ThreadFuzzer::instance().isEffective())
if (config().getBool("remap_executable", false)) if (config().getBool("remap_executable", false))
{ {
LOG_DEBUG(log, "Will remap executable in memory."); LOG_DEBUG(log, "Will remap executable in memory.");
remapExecutable(); size_t size = remapExecutable();
LOG_DEBUG(log, "The code in memory has been successfully remapped."); LOG_DEBUG(log, "The code ({}) in memory has been successfully remapped.", ReadableSize(size));
} }
if (config().getBool("mlock_executable", false)) if (config().getBool("mlock_executable", false))
@ -703,10 +703,6 @@ if (ThreadFuzzer::instance().isEffective())
setupTmpPath(log, disk->getPath()); setupTmpPath(log, disk->getPath());
} }
/// Storage keeping all the backups.
fs::create_directories(path / "backups");
global_context->setBackupsVolume(config().getString("backups_path", path / "backups"), config().getString("backups_policy", ""));
/** Directory with 'flags': files indicating temporary settings for the server set by system administrator. /** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
* Flags may be cleared automatically after being applied by the server. * Flags may be cleared automatically after being applied by the server.
* Examples: do repair of local data; clone all replicated tables from replica. * Examples: do repair of local data; clone all replicated tables from replica.

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include <type_traits> #include <type_traits>
#include <experimental/type_traits>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include <type_traits> #include <type_traits>
#include <experimental/type_traits>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>

View File

@ -2,7 +2,6 @@
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <experimental/type_traits>
#include <type_traits> #include <type_traits>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>

View File

@ -0,0 +1,31 @@
#pragma once
#include <Backups/IBackupEntry.h>
namespace DB
{
/// Represents small preloaded data to be included in a backup.
class BackupEntryFromCallback : public IBackupEntry
{
public:
using ReadBufferCreator = std::function<std::unique_ptr<ReadBuffer>()>;
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
BackupEntryFromCallback(const ReadBufferCreator & callback_, size_t size_, const std::optional<UInt128> & checksum_ = {})
: callback(callback_), size(size_), checksum(checksum_)
{
}
UInt64 getSize() const override { return size; }
std::optional<UInt128> getChecksum() const override { return checksum; }
std::unique_ptr<ReadBuffer> getReadBuffer() const override { return callback(); }
private:
const ReadBufferCreator callback;
const size_t size;
const std::optional<UInt128> checksum;
};
}

View File

@ -1,65 +1,41 @@
#include <Backups/BackupFactory.h> #include <Backups/BackupFactory.h>
#include <Backups/BackupInDirectory.h>
#include <Interpreters/Context.h>
#include <Disks/IVolume.h>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BACKUP_NOT_FOUND; extern const int BACKUP_ENGINE_NOT_FOUND;
extern const int BACKUP_ALREADY_EXISTS;
extern const int NOT_ENOUGH_SPACE;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
BackupFactory & BackupFactory::instance() BackupFactory & BackupFactory::instance()
{ {
static BackupFactory the_instance; static BackupFactory the_instance;
return the_instance; return the_instance;
} }
void BackupFactory::setBackupsVolume(VolumePtr backups_volume_) BackupMutablePtr BackupFactory::createBackup(const CreateParams & params) const
{ {
backups_volume = backups_volume_; const String & engine_name = params.backup_info.backup_engine_name;
auto it = creators.find(engine_name);
if (it == creators.end())
throw Exception(ErrorCodes::BACKUP_ENGINE_NOT_FOUND, "Not found backup engine {}", engine_name);
return (it->second)(params);
} }
BackupMutablePtr BackupFactory::createBackup(const String & backup_name, UInt64 estimated_backup_size, const BackupPtr & base_backup) const void BackupFactory::registerBackupEngine(const String & engine_name, const CreatorFn & creator_fn)
{ {
if (!backups_volume) if (creators.contains(engine_name))
throw Exception(ErrorCodes::LOGICAL_ERROR, "No backups volume"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup engine {} was registered twice", engine_name);
creators[engine_name] = creator_fn;
}
for (const auto & disk : backups_volume->getDisks()) void registerBackupEngines(BackupFactory & factory);
BackupFactory::BackupFactory()
{ {
if (disk->exists(backup_name)) registerBackupEngines(*this);
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", quoteString(backup_name));
}
auto reservation = backups_volume->reserve(estimated_backup_size);
if (!reservation)
throw Exception(
ErrorCodes::NOT_ENOUGH_SPACE,
"Couldn't reserve {} bytes of free space for new backup {}",
estimated_backup_size,
quoteString(backup_name));
return std::make_shared<BackupInDirectory>(IBackup::OpenMode::WRITE, reservation->getDisk(), backup_name, base_backup);
}
BackupPtr BackupFactory::openBackup(const String & backup_name, const BackupPtr & base_backup) const
{
if (!backups_volume)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No backups volume");
for (const auto & disk : backups_volume->getDisks())
{
if (disk->exists(backup_name))
return std::make_shared<BackupInDirectory>(IBackup::OpenMode::READ, disk, backup_name, base_backup);
}
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", quoteString(backup_name));
} }
} }

View File

@ -1,38 +1,46 @@
#pragma once #pragma once
#include <Backups/IBackup.h>
#include <Backups/BackupInfo.h>
#include <Core/Types.h> #include <Core/Types.h>
#include <Parsers/IAST_fwd.h>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <memory> #include <memory>
#include <optional>
#include <unordered_map>
namespace DB namespace DB
{ {
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupMutablePtr = std::shared_ptr<IBackup>;
class Context; class Context;
using ContextMutablePtr = std::shared_ptr<Context>; using ContextPtr = std::shared_ptr<const Context>;
class IVolume;
using VolumePtr = std::shared_ptr<IVolume>;
/// Factory for implementations of the IBackup interface. /// Factory for implementations of the IBackup interface.
class BackupFactory : boost::noncopyable class BackupFactory : boost::noncopyable
{ {
public: public:
using OpenMode = IBackup::OpenMode;
struct CreateParams
{
OpenMode open_mode = OpenMode::WRITE;
BackupInfo backup_info;
std::optional<BackupInfo> base_backup_info;
ContextPtr context;
};
static BackupFactory & instance(); static BackupFactory & instance();
/// Must be called to initialize the backup factory. /// Creates a new backup or opens it.
void setBackupsVolume(VolumePtr backups_volume_); BackupMutablePtr createBackup(const CreateParams & params) const;
/// Creates a new backup and open it for writing. using CreatorFn = std::function<BackupMutablePtr(const CreateParams & params)>;
BackupMutablePtr createBackup(const String & backup_name, UInt64 estimated_backup_size, const BackupPtr & base_backup = {}) const; void registerBackupEngine(const String & engine_name, const CreatorFn & creator_fn);
/// Opens an existing backup for reading.
BackupPtr openBackup(const String & backup_name, const BackupPtr & base_backup = {}) const;
private: private:
VolumePtr backups_volume; BackupFactory();
std::unordered_map<String, CreatorFn> creators;
}; };
} }

476
src/Backups/BackupImpl.cpp Normal file
View File

@ -0,0 +1,476 @@
#include <Backups/BackupImpl.h>
#include <Backups/BackupFactory.h>
#include <Backups/BackupEntryConcat.h>
#include <Backups/BackupEntryFromCallback.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupEntry.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>
#include <Common/typeid_cast.h>
#include <Common/quoteString.h>
#include <IO/HashingReadBuffer.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadHelpers.h>
#include <IO/SeekableReadBuffer.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <Poco/Util/XMLConfiguration.h>
#include <boost/range/adaptor/map.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int BACKUP_NOT_FOUND;
extern const int BACKUP_ALREADY_EXISTS;
extern const int BACKUP_VERSION_NOT_SUPPORTED;
extern const int BACKUP_DAMAGED;
extern const int NO_BASE_BACKUP;
extern const int WRONG_BASE_BACKUP;
extern const int BACKUP_ENTRY_ALREADY_EXISTS;
extern const int BACKUP_ENTRY_NOT_FOUND;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
namespace
{
const UInt64 BACKUP_VERSION = 1;
UInt128 unhexChecksum(const String & checksum)
{
if (checksum.size() != sizeof(UInt128) * 2)
throw Exception(ErrorCodes::BACKUP_DAMAGED, "Unexpected size of checksum: {}, must be {}", checksum.size(), sizeof(UInt128) * 2);
return unhexUInt<UInt128>(checksum.data());
}
}
BackupImpl::BackupImpl(const String & backup_name_, OpenMode open_mode_, const ContextPtr & context_, const std::optional<BackupInfo> & base_backup_info_)
: backup_name(backup_name_), open_mode(open_mode_), context(context_), base_backup_info(base_backup_info_)
{
}
BackupImpl::~BackupImpl() = default;
void BackupImpl::open()
{
if (open_mode == OpenMode::WRITE)
{
if (backupExists())
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", getName());
timestamp = std::time(nullptr);
uuid = UUIDHelpers::generateV4();
startWriting();
writing_started = true;
}
if (open_mode == OpenMode::READ)
{
if (!backupExists())
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", getName());
readBackupMetadata();
}
if (base_backup_info)
{
BackupFactory::CreateParams params;
params.backup_info = *base_backup_info;
params.open_mode = OpenMode::READ;
params.context = context;
base_backup = BackupFactory::instance().createBackup(params);
if (open_mode == OpenMode::WRITE)
base_backup_uuid = base_backup->getUUID();
else if (base_backup_uuid != base_backup->getUUID())
throw Exception(ErrorCodes::WRONG_BASE_BACKUP, "Backup {}: The base backup {} has different UUID ({} != {})",
getName(), base_backup->getName(), toString(base_backup->getUUID()), (base_backup_uuid ? toString(*base_backup_uuid) : ""));
}
}
void BackupImpl::close()
{
if (open_mode == OpenMode::WRITE)
{
if (writing_started && !writing_finalized)
{
/// Creating of the backup wasn't finished correctly,
/// so the backup cannot be used and it's better to remove its files.
removeAllFilesAfterFailure();
}
}
}
void BackupImpl::writeBackupMetadata()
{
Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
config->setUInt("version", BACKUP_VERSION);
config->setString("timestamp", toString(LocalDateTime{timestamp}));
config->setString("uuid", toString(uuid));
if (base_backup_info)
config->setString("base_backup", base_backup_info->toString());
if (base_backup_uuid)
config->setString("base_backup_uuid", toString(*base_backup_uuid));
size_t index = 0;
for (const auto & [name, info] : file_infos)
{
String prefix = index ? "contents.file[" + std::to_string(index) + "]." : "contents.file.";
config->setString(prefix + "name", name);
config->setUInt(prefix + "size", info.size);
if (info.size)
{
config->setString(prefix + "checksum", getHexUIntLowercase(info.checksum));
if (info.base_size)
{
config->setUInt(prefix + "base_size", info.base_size);
if (info.base_size != info.size)
config->setString(prefix + "base_checksum", getHexUIntLowercase(info.base_checksum));
}
}
++index;
}
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
config->save(stream);
String str = stream.str();
auto out = addFileImpl(".backup");
out->write(str.data(), str.size());
}
void BackupImpl::readBackupMetadata()
{
auto in = readFileImpl(".backup");
String str;
readStringUntilEOF(str, *in);
std::istringstream stream(std::move(str)); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
config->load(stream);
UInt64 version = config->getUInt("version");
if (version != BACKUP_VERSION)
throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", getName(), version);
timestamp = parse<LocalDateTime>(config->getString("timestamp")).to_time_t();
uuid = parse<UUID>(config->getString("uuid"));
if (config->has("base_backup") && !base_backup_info)
base_backup_info = BackupInfo::fromString(config->getString("base_backup"));
if (config->has("base_backup_uuid") && !base_backup_uuid)
base_backup_uuid = parse<UUID>(config->getString("base_backup_uuid"));
file_infos.clear();
Poco::Util::AbstractConfiguration::Keys keys;
config->keys("contents", keys);
for (const auto & key : keys)
{
if ((key == "file") || key.starts_with("file["))
{
String prefix = "contents." + key + ".";
String name = config->getString(prefix + "name");
FileInfo & info = file_infos.emplace(name, FileInfo{}).first->second;
info.size = config->getUInt(prefix + "size");
if (info.size)
{
info.checksum = unhexChecksum(config->getString(prefix + "checksum"));
if (config->has(prefix + "base_size"))
{
info.base_size = config->getUInt(prefix + "base_size");
if (info.base_size == info.size)
info.base_checksum = info.checksum;
else
info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum"));
}
}
}
}
}
Strings BackupImpl::listFiles(const String & prefix, const String & terminator) const
{
if (!prefix.ends_with('/') && !prefix.empty())
throw Exception("prefix should end with '/'", ErrorCodes::BAD_ARGUMENTS);
std::lock_guard lock{mutex};
Strings elements;
for (auto it = file_infos.lower_bound(prefix); it != file_infos.end(); ++it)
{
const String & name = it->first;
if (!name.starts_with(prefix))
break;
size_t start_pos = prefix.length();
size_t end_pos = String::npos;
if (!terminator.empty())
end_pos = name.find(terminator, start_pos);
std::string_view new_element = std::string_view{name}.substr(start_pos, end_pos - start_pos);
if (!elements.empty() && (elements.back() == new_element))
continue;
elements.push_back(String{new_element});
}
return elements;
}
bool BackupImpl::fileExists(const String & file_name) const
{
std::lock_guard lock{mutex};
return file_infos.count(file_name) != 0;
}
size_t BackupImpl::getFileSize(const String & file_name) const
{
std::lock_guard lock{mutex};
auto it = file_infos.find(file_name);
if (it == file_infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", getName(), quoteString(file_name));
return it->second.size;
}
UInt128 BackupImpl::getFileChecksum(const String & file_name) const
{
std::lock_guard lock{mutex};
auto it = file_infos.find(file_name);
if (it == file_infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", getName(), quoteString(file_name));
return it->second.checksum;
}
BackupEntryPtr BackupImpl::readFile(const String & file_name) const
{
std::lock_guard lock{mutex};
auto it = file_infos.find(file_name);
if (it == file_infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", getName(), quoteString(file_name));
const auto & info = it->second;
if (!info.size)
{
/// Entry's data is empty.
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
}
auto read_callback = [backup = std::static_pointer_cast<const BackupImpl>(shared_from_this()), file_name]()
{
return backup->readFileImpl(file_name);
};
if (!info.base_size)
{
/// Data goes completely from this backup, the base backup isn't used.
return std::make_unique<BackupEntryFromCallback>(read_callback, info.size, info.checksum);
}
if (info.size < info.base_size)
{
throw Exception(
ErrorCodes::BACKUP_DAMAGED,
"Backup {}: Entry {} has its data size less than in the base backup {}: {} < {}",
getName(), quoteString(file_name), base_backup->getName(), info.size, info.base_size);
}
if (!base_backup)
{
throw Exception(
ErrorCodes::NO_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
getName(), quoteString(file_name));
}
if (!base_backup->fileExists(file_name))
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
getName(), quoteString(file_name));
}
auto base_entry = base_backup->readFile(file_name);
auto base_size = base_entry->getSize();
if (base_size != info.base_size)
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} has unexpected size in the base backup {}: {} (expected size: {})",
getName(), quoteString(file_name), base_backup->getName(), base_size, info.base_size);
}
auto base_checksum = base_entry->getChecksum();
if (base_checksum && (*base_checksum != info.base_checksum))
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} has unexpected checksum in the base backup {}",
getName(), quoteString(file_name), base_backup->getName());
}
if (info.size == info.base_size)
{
/// Data goes completely from the base backup (nothing goes from this backup).
return base_entry;
}
/// The beginning of the data goes from the base backup,
/// and the ending goes from this backup.
return std::make_unique<BackupEntryConcat>(
std::move(base_entry),
std::make_unique<BackupEntryFromCallback>(read_callback, info.size - info.base_size),
info.checksum);
}
void BackupImpl::addFile(const String & file_name, BackupEntryPtr entry)
{
std::lock_guard lock{mutex};
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
if (file_infos.contains(file_name))
throw Exception(
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", getName(), quoteString(file_name));
UInt64 size = entry->getSize();
std::optional<UInt128> checksum = entry->getChecksum();
/// Check if the entry's data is empty.
if (!size)
{
file_infos.emplace(file_name, FileInfo{});
return;
}
/// Check if a entry with such name exists in the base backup.
bool base_exists = (base_backup && base_backup->fileExists(file_name));
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
if (base_exists)
{
base_size = base_backup->getFileSize(file_name);
base_checksum = base_backup->getFileChecksum(file_name);
}
std::unique_ptr<ReadBuffer> read_buffer; /// We'll set that later.
UInt64 read_pos = 0; /// Current position in read_buffer.
/// Determine whether it's possible to receive this entry's data from the base backup completely or partly.
bool use_base = false;
if (base_exists && base_size)
{
if (size == base_size)
{
/// The size is the same, we need to compare checksums to find out
/// if the entry's data has not been changed since the base backup.
if (!checksum)
{
read_buffer = entry->getReadBuffer();
HashingReadBuffer hashing_read_buffer{*read_buffer};
hashing_read_buffer.ignore(size);
read_pos = size;
checksum = hashing_read_buffer.getHash();
}
if (checksum == base_checksum)
use_base = true; /// The data has not been changed.
}
else if (size > base_size)
{
/// The size has been increased, we need to calculate a partial checksum to find out
/// if the entry's data has been only appended since the base backup.
read_buffer = entry->getReadBuffer();
HashingReadBuffer hashing_read_buffer{*read_buffer};
hashing_read_buffer.ignore(base_size);
UInt128 partial_checksum = hashing_read_buffer.getHash();
read_pos = base_size;
if (!checksum)
{
hashing_read_buffer.ignore(size - base_size);
checksum = hashing_read_buffer.getHash();
read_pos = size;
}
if (partial_checksum == base_checksum)
use_base = true; /// The data has been appended.
}
}
if (use_base && (size == base_size))
{
/// The entry's data has not been changed since the base backup.
FileInfo info;
info.size = base_size;
info.checksum = base_checksum;
info.base_size = base_size;
info.base_checksum = base_checksum;
file_infos.emplace(file_name, info);
return;
}
{
/// Either the entry wasn't exist in the base backup
/// or the entry has data appended to the end of the data from the base backup.
/// In both those cases we have to copy data to this backup.
/// Find out where the start position to copy data is.
auto copy_pos = use_base ? base_size : 0;
/// Move the current read position to the start position to copy data.
/// If `read_buffer` is seekable it's easier, otherwise we can use ignore().
if (auto * seekable_buffer = dynamic_cast<SeekableReadBuffer *>(read_buffer.get()))
{
if (read_pos != copy_pos)
seekable_buffer->seek(copy_pos, SEEK_SET);
}
else
{
if (read_pos > copy_pos)
{
read_buffer.reset();
read_pos = 0;
}
if (!read_buffer)
read_buffer = entry->getReadBuffer();
if (read_pos < copy_pos)
read_buffer->ignore(copy_pos - read_pos);
}
/// If we haven't received or calculated a checksum yet, calculate it now.
ReadBuffer * maybe_hashing_read_buffer = read_buffer.get();
std::optional<HashingReadBuffer> hashing_read_buffer;
if (!checksum)
maybe_hashing_read_buffer = &hashing_read_buffer.emplace(*read_buffer);
/// Copy the entry's data after `copy_pos`.
auto out = addFileImpl(file_name);
copyData(*maybe_hashing_read_buffer, *out);
if (hashing_read_buffer)
checksum = hashing_read_buffer->getHash();
/// Done!
FileInfo info;
info.size = size;
info.checksum = *checksum;
if (use_base)
{
info.base_size = base_size;
info.base_checksum = base_checksum;
}
file_infos.emplace(file_name, info);
}
}
void BackupImpl::finalizeWriting()
{
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
writeBackupMetadata();
writing_finalized = true;
}
}

94
src/Backups/BackupImpl.h Normal file
View File

@ -0,0 +1,94 @@
#pragma once
#include <Backups/IBackup.h>
#include <Backups/BackupInfo.h>
#include <map>
#include <mutex>
namespace DB
{
class Context;
using ContextPtr = std::shared_ptr<const Context>;
/// Base implementation of IBackup.
/// Along with passed files it also stores backup metadata - a single file named ".backup" in XML format
/// which contains a list of all files in the backup with their sizes and checksums and information
/// whether the base backup should be used for each entry.
class BackupImpl : public IBackup
{
public:
BackupImpl(
const String & backup_name_,
OpenMode open_mode_,
const ContextPtr & context_,
const std::optional<BackupInfo> & base_backup_info_ = {});
~BackupImpl() override;
const String & getName() const override { return backup_name; }
OpenMode getOpenMode() const override { return open_mode; }
time_t getTimestamp() const override { return timestamp; }
UUID getUUID() const override { return uuid; }
Strings listFiles(const String & prefix, const String & terminator) const override;
bool fileExists(const String & file_name) const override;
size_t getFileSize(const String & file_name) const override;
UInt128 getFileChecksum(const String & file_name) const override;
BackupEntryPtr readFile(const String & file_name) const override;
void addFile(const String & file_name, BackupEntryPtr entry) override;
void finalizeWriting() override;
protected:
/// Should be called in the constructor of a derived class.
void open();
/// Should be called in the destructor of a derived class.
void close();
/// Read a file from the backup.
/// Low level: the function doesn't check base backup or checksums.
virtual std::unique_ptr<ReadBuffer> readFileImpl(const String & file_name) const = 0;
/// Add a file to the backup.
/// Low level: the function doesn't check base backup or checksums.
virtual std::unique_ptr<WriteBuffer> addFileImpl(const String & file_name) = 0;
/// Checks if this backup exists.
virtual bool backupExists() const = 0;
/// Starts writing of this backup, only used if `open_mode == OpenMode::WRITE`.
/// After calling this function `backupExists()` should return true.
virtual void startWriting() = 0;
/// Removes all the backup files, called if something goes wrong while we're writing the backup.
/// This function is called by `close()` if `startWriting()` was called and `finalizeWriting()` wasn't.
virtual void removeAllFilesAfterFailure() = 0;
private:
void writeBackupMetadata();
void readBackupMetadata();
struct FileInfo
{
UInt64 size = 0;
UInt128 checksum{0, 0};
/// for incremental backups
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
};
const String backup_name;
const OpenMode open_mode;
UUID uuid;
time_t timestamp = 0;
ContextPtr context;
std::optional<BackupInfo> base_backup_info;
std::shared_ptr<const IBackup> base_backup;
std::optional<UUID> base_backup_uuid;
std::map<String, FileInfo> file_infos;
bool writing_started = false;
bool writing_finalized = false;
mutable std::mutex mutex;
};
}

View File

@ -1,454 +1,160 @@
#include <Backups/BackupInDirectory.h> #include <Backups/BackupInDirectory.h>
#include <Backups/BackupFactory.h> #include <Backups/BackupFactory.h>
#include <Backups/BackupEntryConcat.h>
#include <Backups/BackupEntryFromImmutableFile.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupEntry.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Disks/DiskSelector.h> #include <Disks/DiskSelector.h>
#include <Disks/IDisk.h> #include <Disks/IDisk.h>
#include <IO/HashingReadBuffer.h> #include <Disks/DiskLocal.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadHelpers.h>
#include <IO/SeekableReadBuffer.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <boost/range/adaptor/map.hpp>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BACKUP_NOT_FOUND;
extern const int BACKUP_ALREADY_EXISTS;
extern const int BACKUP_VERSION_NOT_SUPPORTED;
extern const int BACKUP_DAMAGED;
extern const int NO_BASE_BACKUP;
extern const int WRONG_BASE_BACKUP;
extern const int BACKUP_ENTRY_ALREADY_EXISTS;
extern const int BACKUP_ENTRY_NOT_FOUND;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
} }
namespace namespace
{ {
const UInt64 BACKUP_VERSION = 1; /// Checks multiple keys "key", "key[1]", "key[2]", and so on in the configuration
/// and find out if some of them have matching value.
bool findConfigKeyWithMatchingValue(const Poco::Util::AbstractConfiguration & config, const String & key, const std::function<bool(const String & value)> & match_function)
{
String current_key = key;
size_t counter = 0;
while (config.has(current_key))
{
if (match_function(config.getString(current_key)))
return true;
current_key = key + "[" + std::to_string(++counter) + "]";
}
return false;
} }
BackupInDirectory::BackupInDirectory(OpenMode open_mode_, const DiskPtr & disk_, const String & path_, const std::shared_ptr<const IBackup> & base_backup_) bool isDiskAllowed(const String & disk_name, const Poco::Util::AbstractConfiguration & config)
: open_mode(open_mode_), disk(disk_), path(path_), path_with_sep(path_), base_backup(base_backup_)
{ {
if (!path_with_sep.ends_with('/')) return findConfigKeyWithMatchingValue(config, "backups.allowed_disk", [&](const String & value) { return value == disk_name; });
path_with_sep += '/'; }
trimRight(path, '/');
bool isPathAllowed(const String & path, const Poco::Util::AbstractConfiguration & config)
{
return findConfigKeyWithMatchingValue(config, "backups.allowed_path", [&](const String & value) { return path.starts_with(value); });
}
}
BackupInDirectory::BackupInDirectory(
const String & backup_name_,
OpenMode open_mode_,
const DiskPtr & disk_,
const String & path_,
const ContextPtr & context_,
const std::optional<BackupInfo> & base_backup_info_)
: BackupImpl(backup_name_, open_mode_, context_, base_backup_info_)
, disk(disk_), path(path_)
{
/// Path to backup must end with '/'
if (path.back() != '/')
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to backup must end with '/', but {} doesn't.", getName(), quoteString(path));
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
/// If `disk` is not specified, we create an internal instance of `DiskLocal` here.
if (!disk)
{
auto fspath = fs::path{dir_path};
if (!fspath.has_filename())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to a backup must be a directory path.", getName(), quoteString(path));
path = fspath.filename() / "";
dir_path = fs::path(path).parent_path(); /// get path without terminating slash
String disk_path = fspath.remove_filename();
disk = std::make_shared<DiskLocal>(disk_path, disk_path, 0);
}
open(); open();
} }
BackupInDirectory::~BackupInDirectory() BackupInDirectory::~BackupInDirectory()
{ {
close(); close();
} }
void BackupInDirectory::open() bool BackupInDirectory::backupExists() const
{ {
if (open_mode == OpenMode::WRITE) return disk->isDirectory(dir_path);
{
if (disk->exists(path))
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", quoteString(path));
disk->createDirectories(path);
directory_was_created = true;
writePathToBaseBackup();
} }
if (open_mode == OpenMode::READ) void BackupInDirectory::startWriting()
{ {
if (!disk->isDirectory(path)) disk->createDirectories(dir_path);
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", quoteString(path));
readContents();
readPathToBaseBackup();
}
} }
void BackupInDirectory::close() void BackupInDirectory::removeAllFilesAfterFailure()
{ {
if (open_mode == OpenMode::WRITE) if (disk->isDirectory(dir_path))
{ disk->removeRecursive(dir_path);
if (!finalized && directory_was_created)
{
/// Creating of the backup wasn't finished correctly,
/// so the backup cannot be used and it's better to remove its files.
disk->removeRecursive(path);
}
}
} }
void BackupInDirectory::writePathToBaseBackup() std::unique_ptr<ReadBuffer> BackupInDirectory::readFileImpl(const String & file_name) const
{ {
String file_path = path_with_sep + ".base_backup"; String file_path = path + file_name;
if (!base_backup) return disk->readFile(file_path);
{
disk->removeFileIfExists(file_path);
return;
}
auto out = disk->writeFile(file_path);
writeString(base_backup->getPath(), *out);
} }
void BackupInDirectory::readPathToBaseBackup() std::unique_ptr<WriteBuffer> BackupInDirectory::addFileImpl(const String & file_name)
{ {
if (base_backup) String file_path = path + file_name;
return; disk->createDirectories(fs::path(file_path).parent_path());
String file_path = path_with_sep + ".base_backup"; return disk->writeFile(file_path);
if (!disk->exists(file_path))
return;
auto in = disk->readFile(file_path);
String base_backup_path;
readStringUntilEOF(base_backup_path, *in);
if (base_backup_path.empty())
return;
base_backup = BackupFactory::instance().openBackup(base_backup_path);
}
void BackupInDirectory::writeContents()
{
auto out = disk->writeFile(path_with_sep + ".contents");
writeVarUInt(BACKUP_VERSION, *out);
writeVarUInt(infos.size(), *out);
for (const auto & [path_in_backup, info] : infos)
{
writeBinary(path_in_backup, *out);
writeVarUInt(info.size, *out);
if (info.size)
{
writeBinary(info.checksum, *out);
writeVarUInt(info.base_size, *out);
if (info.base_size && (info.base_size != info.size))
writeBinary(info.base_checksum, *out);
}
}
}
void BackupInDirectory::readContents()
{
auto in = disk->readFile(path_with_sep + ".contents");
UInt64 version;
readVarUInt(version, *in);
if (version != BACKUP_VERSION)
throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", quoteString(path), version);
size_t num_infos;
readVarUInt(num_infos, *in);
infos.clear();
for (size_t i = 0; i != num_infos; ++i)
{
String path_in_backup;
readBinary(path_in_backup, *in);
EntryInfo info;
readVarUInt(info.size, *in);
if (info.size)
{
readBinary(info.checksum, *in);
readVarUInt(info.base_size, *in);
if (info.base_size && (info.base_size != info.size))
readBinary(info.base_checksum, *in);
else if (info.base_size)
info.base_checksum = info.checksum;
}
infos.emplace(path_in_backup, info);
}
}
IBackup::OpenMode BackupInDirectory::getOpenMode() const
{
return open_mode;
}
String BackupInDirectory::getPath() const
{
return path;
}
Strings BackupInDirectory::list(const String & prefix, const String & terminator) const
{
if (!prefix.ends_with('/') && !prefix.empty())
throw Exception("prefix should end with '/'", ErrorCodes::BAD_ARGUMENTS);
std::lock_guard lock{mutex};
Strings elements;
for (auto it = infos.lower_bound(prefix); it != infos.end(); ++it)
{
const String & name = it->first;
if (!name.starts_with(prefix))
break;
size_t start_pos = prefix.length();
size_t end_pos = String::npos;
if (!terminator.empty())
end_pos = name.find(terminator, start_pos);
std::string_view new_element = std::string_view{name}.substr(start_pos, end_pos - start_pos);
if (!elements.empty() && (elements.back() == new_element))
continue;
elements.push_back(String{new_element});
}
return elements;
}
bool BackupInDirectory::exists(const String & name) const
{
std::lock_guard lock{mutex};
return infos.count(name) != 0;
}
size_t BackupInDirectory::getSize(const String & name) const
{
std::lock_guard lock{mutex};
auto it = infos.find(name);
if (it == infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
return it->second.size;
}
UInt128 BackupInDirectory::getChecksum(const String & name) const
{
std::lock_guard lock{mutex};
auto it = infos.find(name);
if (it == infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
return it->second.checksum;
} }
BackupEntryPtr BackupInDirectory::read(const String & name) const void registerBackupEngineFile(BackupFactory & factory)
{ {
std::lock_guard lock{mutex}; auto creator_fn = [](const BackupFactory::CreateParams & params)
auto it = infos.find(name);
if (it == infos.end())
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
const auto & info = it->second;
if (!info.size)
{ {
/// Entry's data is empty. String backup_name = params.backup_info.toString();
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0}); const String & engine_name = params.backup_info.backup_engine_name;
} const auto & args = params.backup_info.args;
if (!info.base_size) DiskPtr disk;
String path;
if (engine_name == "File")
{ {
/// Data goes completely from this backup, the base backup isn't used. if (args.size() != 1)
return std::make_unique<BackupEntryFromImmutableFile>(disk, path_with_sep + name, info.size, info.checksum);
}
if (info.size < info.base_size)
{ {
throw Exception( throw Exception(
ErrorCodes::BACKUP_DAMAGED, "Backup engine 'File' requires 1 argument (path)",
"Backup {}: Entry {} has its data size less than in the base backup {}: {} < {}", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
quoteString(path), quoteString(name), quoteString(base_backup->getPath()), info.size, info.base_size);
} }
if (!base_backup) path = args[0].safeGet<String>();
if (!isPathAllowed(path, params.context->getConfigRef()))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} is not allowed for backups", path);
}
else if (engine_name == "Disk")
{
if (args.size() != 2)
{ {
throw Exception( throw Exception(
ErrorCodes::NO_BASE_BACKUP, "Backup engine 'Disk' requires 2 arguments (disk_name, path)",
"Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
quoteString(path), quoteString(name));
} }
if (!base_backup->exists(name)) String disk_name = args[0].safeGet<String>();
{ disk = params.context->getDisk(disk_name);
throw Exception( path = args[1].safeGet<String>();
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there", if (!isDiskAllowed(disk_name, params.context->getConfigRef()))
quoteString(path), quoteString(name)); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} is not allowed for backups", disk_name);
} }
auto base_entry = base_backup->read(name); return std::make_shared<BackupInDirectory>(backup_name, params.open_mode, disk, path, params.context, params.base_backup_info);
auto base_size = base_entry->getSize(); };
if (base_size != info.base_size)
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} has unexpected size in the base backup {}: {} (expected size: {})",
quoteString(path), quoteString(name), quoteString(base_backup->getPath()), base_size, info.base_size);
}
auto base_checksum = base_entry->getChecksum(); factory.registerBackupEngine("File", creator_fn);
if (base_checksum && (*base_checksum != info.base_checksum)) factory.registerBackupEngine("Disk", creator_fn);
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} has unexpected checksum in the base backup {}",
quoteString(path), quoteString(name), quoteString(base_backup->getPath()));
}
if (info.size == info.base_size)
{
/// Data goes completely from the base backup (nothing goes from this backup).
return base_entry;
}
/// The beginning of the data goes from the base backup,
/// and the ending goes from this backup.
return std::make_unique<BackupEntryConcat>(
std::move(base_entry),
std::make_unique<BackupEntryFromImmutableFile>(disk, path_with_sep + name, info.size - info.base_size),
info.checksum);
}
void BackupInDirectory::write(const String & name, BackupEntryPtr entry)
{
std::lock_guard lock{mutex};
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
if (infos.contains(name))
throw Exception(
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", quoteString(path), quoteString(name));
UInt64 size = entry->getSize();
std::optional<UInt128> checksum = entry->getChecksum();
/// Check if the entry's data is empty.
if (!size)
{
infos.emplace(name, EntryInfo{});
return;
}
/// Check if a entry with such name exists in the base backup.
bool base_exists = (base_backup && base_backup->exists(name));
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
if (base_exists)
{
base_size = base_backup->getSize(name);
base_checksum = base_backup->getChecksum(name);
}
std::unique_ptr<ReadBuffer> read_buffer; /// We'll set that later.
UInt64 read_pos = 0; /// Current position in read_buffer.
/// Determine whether it's possible to receive this entry's data from the base backup completely or partly.
bool use_base = false;
if (base_exists && base_size)
{
if (size == base_size)
{
/// The size is the same, we need to compare checksums to find out
/// if the entry's data has not been changed since the base backup.
if (!checksum)
{
read_buffer = entry->getReadBuffer();
HashingReadBuffer hashing_read_buffer{*read_buffer};
hashing_read_buffer.ignore(size);
read_pos = size;
checksum = hashing_read_buffer.getHash();
}
if (checksum == base_checksum)
use_base = true; /// The data has not been changed.
}
else if (size > base_size)
{
/// The size has been increased, we need to calculate a partial checksum to find out
/// if the entry's data has been only appended since the base backup.
read_buffer = entry->getReadBuffer();
HashingReadBuffer hashing_read_buffer{*read_buffer};
hashing_read_buffer.ignore(base_size);
UInt128 partial_checksum = hashing_read_buffer.getHash();
read_pos = base_size;
if (!checksum)
{
hashing_read_buffer.ignore(size - base_size);
checksum = hashing_read_buffer.getHash();
read_pos = size;
}
if (partial_checksum == base_checksum)
use_base = true; /// The data has been appended.
}
}
if (use_base && (size == base_size))
{
/// The entry's data has not been changed since the base backup.
EntryInfo info;
info.size = base_size;
info.checksum = base_checksum;
info.base_size = base_size;
info.base_checksum = base_checksum;
infos.emplace(name, info);
return;
}
{
/// Either the entry wasn't exist in the base backup
/// or the entry has data appended to the end of the data from the base backup.
/// In both those cases we have to copy data to this backup.
/// Find out where the start position to copy data is.
auto copy_pos = use_base ? base_size : 0;
/// Move the current read position to the start position to copy data.
/// If `read_buffer` is seekable it's easier, otherwise we can use ignore().
if ((read_pos > copy_pos) && !typeid_cast<SeekableReadBuffer *>(read_buffer.get()))
{
read_buffer.reset();
read_pos = 0;
}
if (!read_buffer)
read_buffer = entry->getReadBuffer();
if (read_pos != copy_pos)
{
if (auto * seekable_buffer = typeid_cast<SeekableReadBuffer *>(read_buffer.get()))
seekable_buffer->seek(copy_pos, SEEK_SET);
else if (copy_pos)
read_buffer->ignore(copy_pos - read_pos);
}
/// If we haven't received or calculated a checksum yet, calculate it now.
ReadBuffer * maybe_hashing_read_buffer = read_buffer.get();
std::optional<HashingReadBuffer> hashing_read_buffer;
if (!checksum)
maybe_hashing_read_buffer = &hashing_read_buffer.emplace(*read_buffer);
/// Copy the entry's data after `copy_pos`.
String out_file_path = path_with_sep + name;
disk->createDirectories(directoryPath(out_file_path));
auto out = disk->writeFile(out_file_path);
copyData(*maybe_hashing_read_buffer, *out, size - copy_pos);
if (hashing_read_buffer)
checksum = hashing_read_buffer->getHash();
/// Done!
EntryInfo info;
info.size = size;
info.checksum = *checksum;
if (use_base)
{
info.base_size = base_size;
info.base_checksum = base_checksum;
}
infos.emplace(name, info);
}
}
void BackupInDirectory::finalizeWriting()
{
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
writeContents();
finalized = true;
} }
} }

View File

@ -1,8 +1,6 @@
#pragma once #pragma once
#include <Backups/IBackup.h> #include <Backups/BackupImpl.h>
#include <map>
#include <mutex>
namespace DB namespace DB
@ -12,55 +10,29 @@ using DiskPtr = std::shared_ptr<IDisk>;
/// Represents a backup stored on a disk. /// Represents a backup stored on a disk.
/// A backup is stored as a directory, each entry is stored as a file in that directory. /// A backup is stored as a directory, each entry is stored as a file in that directory.
/// Also three system files are stored: class BackupInDirectory : public BackupImpl
/// 1) ".base" is an XML file with information about the base backup.
/// 2) ".contents" is a binary file containing a list of all entries along with their sizes
/// and checksums and information whether the base backup should be used for each entry
/// 3) ".write_lock" is a temporary empty file which is created before writing of a backup
/// and deleted after finishing that writing.
class BackupInDirectory : public IBackup
{ {
public: public:
BackupInDirectory(OpenMode open_mode_, const DiskPtr & disk_, const String & path_, const std::shared_ptr<const IBackup> & base_backup_ = {}); /// `disk`_ is allowed to be nullptr and that means the `path_` is a path in the local filesystem.
BackupInDirectory(
const String & backup_name_,
OpenMode open_mode_,
const DiskPtr & disk_,
const String & path_,
const ContextPtr & context_,
const std::optional<BackupInfo> & base_backup_info_ = {});
~BackupInDirectory() override; ~BackupInDirectory() override;
OpenMode getOpenMode() const override;
String getPath() const override;
Strings list(const String & prefix, const String & terminator) const override;
bool exists(const String & name) const override;
size_t getSize(const String & name) const override;
UInt128 getChecksum(const String & name) const override;
BackupEntryPtr read(const String & name) const override;
void write(const String & name, BackupEntryPtr entry) override;
void finalizeWriting() override;
private: private:
void open(); bool backupExists() const override;
void close(); void startWriting() override;
void writePathToBaseBackup(); void removeAllFilesAfterFailure() override;
void readPathToBaseBackup(); std::unique_ptr<ReadBuffer> readFileImpl(const String & file_name) const override;
void writeContents(); std::unique_ptr<WriteBuffer> addFileImpl(const String & file_name) override;
void readContents();
struct EntryInfo DiskPtr disk;
{
UInt64 size = 0;
UInt128 checksum{0, 0};
/// for incremental backups
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
};
const OpenMode open_mode;
const DiskPtr disk;
String path; String path;
String path_with_sep; String dir_path; /// `path` without terminating slash
std::shared_ptr<const IBackup> base_backup;
std::map<String, EntryInfo> infos;
bool directory_was_created = false;
bool finalized = false;
mutable std::mutex mutex;
}; };
} }

View File

@ -0,0 +1,70 @@
#include <Backups/BackupInfo.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
String BackupInfo::toString() const
{
auto func = std::make_shared<ASTFunction>();
func->name = backup_engine_name;
func->no_empty_args = true;
auto list = std::make_shared<ASTExpressionList>();
func->arguments = list;
func->children.push_back(list);
list->children.reserve(args.size());
for (const auto & arg : args)
list->children.push_back(std::make_shared<ASTLiteral>(arg));
return serializeAST(*func);
}
BackupInfo BackupInfo::fromString(const String & str)
{
ParserIdentifierWithOptionalParameters parser;
ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
return fromAST(*ast);
}
BackupInfo BackupInfo::fromAST(const IAST & ast)
{
const auto * func = ast.as<const ASTFunction>();
if (!func)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected function, got {}", serializeAST(ast));
BackupInfo res;
res.backup_engine_name = func->name;
if (func->arguments)
{
const auto * list = func->arguments->as<const ASTExpressionList>();
if (!list)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected list, got {}", serializeAST(*func->arguments));
res.args.reserve(list->children.size());
for (const auto & elem : list->children)
{
const auto * lit = elem->as<const ASTLiteral>();
if (!lit)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected literal, got {}", serializeAST(*elem));
res.args.push_back(lit->value);
}
}
return res;
}
}

21
src/Backups/BackupInfo.h Normal file
View File

@ -0,0 +1,21 @@
#pragma once
#include <Core/Field.h>
namespace DB
{
class IAST;
/// Information about a backup.
struct BackupInfo
{
String backup_engine_name;
std::vector<Field> args;
String toString() const;
static BackupInfo fromString(const String & str);
static BackupInfo fromAST(const IAST & ast);
};
}

View File

@ -7,7 +7,7 @@ namespace DB
{ {
#define LIST_OF_BACKUP_SETTINGS(M) \ #define LIST_OF_BACKUP_SETTINGS(M) \
M(String, base_backup, "", "Name of the base backup. Only differences made after the base backup will be included in a newly created backup, so this option allows to make an incremental backup.", 0) \ M(Bool, dummy, false, "", 0) \
DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS) DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS)

View File

@ -18,6 +18,8 @@
#include <boost/range/adaptor/reversed.hpp> #include <boost/range/adaptor/reversed.hpp>
#include <filesystem> #include <filesystem>
namespace fs = std::filesystem;
namespace DB namespace DB
{ {
@ -426,7 +428,7 @@ namespace
ASTPtr readCreateQueryFromBackup(const DatabaseAndTableName & table_name, const BackupPtr & backup) ASTPtr readCreateQueryFromBackup(const DatabaseAndTableName & table_name, const BackupPtr & backup)
{ {
String create_query_path = getMetadataPathInBackup(table_name); String create_query_path = getMetadataPathInBackup(table_name);
auto read_buffer = backup->read(create_query_path)->getReadBuffer(); auto read_buffer = backup->readFile(create_query_path)->getReadBuffer();
String create_query_str; String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer); readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset(); read_buffer.reset();
@ -437,7 +439,7 @@ namespace
ASTPtr readCreateQueryFromBackup(const String & database_name, const BackupPtr & backup) ASTPtr readCreateQueryFromBackup(const String & database_name, const BackupPtr & backup)
{ {
String create_query_path = getMetadataPathInBackup(database_name); String create_query_path = getMetadataPathInBackup(database_name);
auto read_buffer = backup->read(create_query_path)->getReadBuffer(); auto read_buffer = backup->readFile(create_query_path)->getReadBuffer();
String create_query_str; String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer); readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset(); read_buffer.reset();
@ -546,9 +548,10 @@ namespace
} }
RestoreObjectsTasks restore_objects_tasks; RestoreObjectsTasks restore_objects_tasks;
Strings table_names = backup->list("metadata/" + escapeForFileName(database_name) + "/", "/"); Strings table_metadata_filenames = backup->listFiles("metadata/" + escapeForFileName(database_name) + "/", "/");
for (const String & table_name : table_names) for (const String & table_metadata_filename : table_metadata_filenames)
{ {
String table_name = unescapeForFileName(fs::path{table_metadata_filename}.stem());
if (except_list.contains(table_name)) if (except_list.contains(table_name))
continue; continue;
restoreTable({database_name, table_name}, {}, context, backup, renaming_config, restore_objects_tasks); restoreTable({database_name, table_name}, {}, context, backup, renaming_config, restore_objects_tasks);
@ -565,10 +568,11 @@ namespace
{ {
restore_tasks.emplace_back([except_list, context, backup, renaming_config]() -> RestoreDataTasks restore_tasks.emplace_back([except_list, context, backup, renaming_config]() -> RestoreDataTasks
{ {
Strings database_names = backup->list("metadata/", "/");
RestoreObjectsTasks restore_objects_tasks; RestoreObjectsTasks restore_objects_tasks;
for (const String & database_name : database_names) Strings database_metadata_filenames = backup->listFiles("metadata/", "/");
for (const String & database_metadata_filename : database_metadata_filenames)
{ {
String database_name = unescapeForFileName(fs::path{database_metadata_filename}.stem());
if (except_list.contains(database_name)) if (except_list.contains(database_name))
continue; continue;
restoreDatabase(database_name, {}, context, backup, renaming_config, restore_objects_tasks); restoreDatabase(database_name, {}, context, backup, renaming_config, restore_objects_tasks);
@ -650,10 +654,10 @@ UInt64 estimateBackupSize(const BackupEntries & backup_entries, const BackupPtr
UInt64 data_size = entry->getSize(); UInt64 data_size = entry->getSize();
if (base_backup) if (base_backup)
{ {
if (base_backup->exists(name) && (data_size == base_backup->getSize(name))) if (base_backup->fileExists(name) && (data_size == base_backup->getFileSize(name)))
{ {
auto checksum = entry->getChecksum(); auto checksum = entry->getChecksum();
if (checksum && (*checksum == base_backup->getChecksum(name))) if (checksum && (*checksum == base_backup->getFileChecksum(name)))
continue; continue;
} }
} }
@ -664,7 +668,7 @@ UInt64 estimateBackupSize(const BackupEntries & backup_entries, const BackupPtr
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads) void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads)
{ {
if (!num_threads) if (!num_threads || !backup->supportsWritingInMultipleThreads())
num_threads = 1; num_threads = 1;
std::vector<ThreadFromGlobalPool> threads; std::vector<ThreadFromGlobalPool> threads;
size_t num_active_threads = 0; size_t num_active_threads = 0;
@ -691,7 +695,7 @@ void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries
{ {
try try
{ {
backup->write(name, std::move(entry)); backup->addFile(name, std::move(entry));
} }
catch (...) catch (...)
{ {
@ -747,7 +751,6 @@ RestoreObjectsTasks makeRestoreTasks(const Elements & elements, ContextMutablePt
case ElementType::DATABASE: case ElementType::DATABASE:
{ {
const String & database_name = element.name.first; const String & database_name = element.name.first;
auto database = DatabaseCatalog::instance().getDatabase(database_name, context);
restoreDatabase(database_name, element.except_list, context, backup, renaming_config, restore_tasks); restoreDatabase(database_name, element.except_list, context, backup, renaming_config, restore_tasks);
break; break;
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Core/Types.h> #include <Core/Types.h>
#include <Common/TypePromotion.h>
#include <memory> #include <memory>
@ -12,11 +13,15 @@ using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
/// Represents a backup, i.e. a storage of BackupEntries which can be accessed by their names. /// Represents a backup, i.e. a storage of BackupEntries which can be accessed by their names.
/// A backup can be either incremental or non-incremental. An incremental backup doesn't store /// A backup can be either incremental or non-incremental. An incremental backup doesn't store
/// the data of the entries which are not changed compared to its base backup. /// the data of the entries which are not changed compared to its base backup.
class IBackup class IBackup : public std::enable_shared_from_this<IBackup>, public TypePromotion<IBackup>
{ {
public: public:
IBackup() {}
virtual ~IBackup() = default; virtual ~IBackup() = default;
/// Name of the backup.
virtual const String & getName() const = 0;
enum class OpenMode enum class OpenMode
{ {
READ, READ,
@ -26,8 +31,11 @@ public:
/// A backup can be open either in READ or WRITE mode. /// A backup can be open either in READ or WRITE mode.
virtual OpenMode getOpenMode() const = 0; virtual OpenMode getOpenMode() const = 0;
/// Returns the path to the backup. /// Returns the time point when this backup was created.
virtual String getPath() const = 0; virtual time_t getTimestamp() const = 0;
/// Returns UUID of the backup.
virtual UUID getUUID() const = 0;
/// Returns names of entries stored in the backup. /// Returns names of entries stored in the backup.
/// If `prefix` isn't empty the function will return only the names starting with /// If `prefix` isn't empty the function will return only the names starting with
@ -36,24 +44,27 @@ public:
/// before the terminator. For example, list("", "") returns names of all the entries /// before the terminator. For example, list("", "") returns names of all the entries
/// in the backup; and list("data/", "/") return kind of a list of folders and /// in the backup; and list("data/", "/") return kind of a list of folders and
/// files stored in the "data/" directory inside the backup. /// files stored in the "data/" directory inside the backup.
virtual Strings list(const String & prefix = "", const String & terminator = "/") const = 0; virtual Strings listFiles(const String & prefix = "", const String & terminator = "/") const = 0;
/// Checks if an entry with a specified name exists. /// Checks if an entry with a specified name exists.
virtual bool exists(const String & name) const = 0; virtual bool fileExists(const String & file_name) const = 0;
/// Returns the size of the entry's data. /// Returns the size of the entry's data.
/// This function does the same as `read(name)->getSize()` but faster. /// This function does the same as `read(file_name)->getSize()` but faster.
virtual size_t getSize(const String & name) const = 0; virtual size_t getFileSize(const String & file_name) const = 0;
/// Returns the checksum of the entry's data. /// Returns the checksum of the entry's data.
/// This function does the same as `read(name)->getCheckum()` but faster. /// This function does the same as `read(file_name)->getCheckum()` but faster.
virtual UInt128 getChecksum(const String & name) const = 0; virtual UInt128 getFileChecksum(const String & file_name) const = 0;
/// Reads an entry from the backup. /// Reads an entry from the backup.
virtual BackupEntryPtr read(const String & name) const = 0; virtual BackupEntryPtr readFile(const String & file_name) const = 0;
/// Puts a new entry to the backup. /// Puts a new entry to the backup.
virtual void write(const String & name, BackupEntryPtr entry) = 0; virtual void addFile(const String & file_name, BackupEntryPtr entry) = 0;
/// Whether it's possible to add new entries to the backup in multiple threads.
virtual bool supportsWritingInMultipleThreads() const { return true; }
/// Finalizes writing the backup, should be called after all entries have been successfully written. /// Finalizes writing the backup, should be called after all entries have been successfully written.
virtual void finalizeWriting() = 0; virtual void finalizeWriting() = 0;

View File

@ -0,0 +1,14 @@
namespace DB
{
class BackupFactory;
void registerBackupEngineFile(BackupFactory &);
void registerBackupEngines(BackupFactory & factory)
{
registerBackupEngineFile(factory);
}
}

View File

@ -21,6 +21,7 @@
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTSubquery.h> #include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h> #include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Parsers/ASTUseQuery.h> #include <Parsers/ASTUseQuery.h>
#include <Parsers/ASTWindowDefinition.h> #include <Parsers/ASTWindowDefinition.h>
#include <Parsers/ParserQuery.h> #include <Parsers/ParserQuery.h>
@ -447,6 +448,11 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
{ {
fuzz(with_union->list_of_selects); fuzz(with_union->list_of_selects);
} }
else if (auto * with_intersect_except = typeid_cast<ASTSelectIntersectExceptQuery *>(ast.get()))
{
auto selects = with_intersect_except->getListOfSelects();
fuzz(selects);
}
else if (auto * tables = typeid_cast<ASTTablesInSelectQuery *>(ast.get())) else if (auto * tables = typeid_cast<ASTTablesInSelectQuery *>(ast.get()))
{ {
fuzz(tables->children); fuzz(tables->children);

View File

@ -594,6 +594,8 @@
M(624, BAD_FILE_TYPE) \ M(624, BAD_FILE_TYPE) \
M(625, IO_SETUP_ERROR) \ M(625, IO_SETUP_ERROR) \
M(626, CANNOT_SKIP_UNKNOWN_FIELD) \ M(626, CANNOT_SKIP_UNKNOWN_FIELD) \
M(627, BACKUP_ENGINE_NOT_FOUND) \
M(628, OFFSET_FETCH_WITHOUT_ORDER_BY) \
\ \
M(999, KEEPER_EXCEPTION) \ M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \ M(1000, POCO_EXCEPTION) \

View File

@ -206,8 +206,8 @@ static void getNoSpaceLeftInfoMessage(std::filesystem::path path, String & msg)
fmt::format_to(std::back_inserter(msg), fmt::format_to(std::back_inserter(msg),
"\nTotal space: {}\nAvailable space: {}\nTotal inodes: {}\nAvailable inodes: {}\nMount point: {}", "\nTotal space: {}\nAvailable space: {}\nTotal inodes: {}\nAvailable inodes: {}\nMount point: {}",
ReadableSize(fs.f_blocks * fs.f_bsize), ReadableSize(fs.f_blocks * fs.f_frsize),
ReadableSize(fs.f_bavail * fs.f_bsize), ReadableSize(fs.f_bavail * fs.f_frsize),
formatReadableQuantity(fs.f_files), formatReadableQuantity(fs.f_files),
formatReadableQuantity(fs.f_favail), formatReadableQuantity(fs.f_favail),
mount_point); mount_point);

View File

@ -18,6 +18,7 @@
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include <Core/Defines.h> #include <Core/Defines.h>
#include <base/extended_types.h>
#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b)))) #define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
@ -191,6 +192,15 @@ inline void sipHash128(const char * data, const size_t size, char * out)
hash.get128(out); hash.get128(out);
} }
inline UInt128 sipHash128(const char * data, const size_t size)
{
SipHash hash;
hash.update(data, size);
UInt128 res;
hash.get128(res);
return res;
}
inline UInt64 sipHash64(const char * data, const size_t size) inline UInt64 sipHash64(const char * data, const size_t size)
{ {
SipHash hash; SipHash hash;

View File

@ -116,3 +116,26 @@ inline UInt16 unhex4(const char * data)
+ static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[2])) * 0x10
+ static_cast<UInt16>(unhex(data[3])); + static_cast<UInt16>(unhex(data[3]));
} }
template <typename TUInt>
TUInt unhexUInt(const char * data)
{
TUInt res = 0;
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
{
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
{
res <<= 4;
res += unhex(*data);
}
}
else
{
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
{
res <<= 64;
res += unhexUInt<UInt64>(data);
}
}
return res;
}

View File

@ -1,3 +1,5 @@
#include "remapExecutable.h"
#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES) #if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES)
#include <sys/mman.h> #include <sys/mman.h>
@ -11,8 +13,6 @@
#include <Common/Exception.h> #include <Common/Exception.h>
#include <fmt/format.h> #include <fmt/format.h>
#include "remapExecutable.h"
namespace DB namespace DB
{ {
@ -136,10 +136,11 @@ __attribute__((__noinline__)) void remapToHugeStep1(void * begin, size_t size)
} }
void remapExecutable() size_t remapExecutable()
{ {
auto [begin, size] = getMappedArea(reinterpret_cast<void *>(remapExecutable)); auto [begin, size] = getMappedArea(reinterpret_cast<void *>(remapExecutable));
remapToHugeStep1(begin, size); remapToHugeStep1(begin, size);
return size;
} }
} }
@ -149,7 +150,7 @@ void remapExecutable()
namespace DB namespace DB
{ {
void remapExecutable() {} size_t remapExecutable() { return 0; }
} }

View File

@ -1,8 +1,12 @@
#pragma once #pragma once
#include <cstdlib>
namespace DB namespace DB
{ {
/// This function tries to reallocate the code of the running program in a more efficient way. /// This function tries to reallocate the code of the running program in a more efficient way.
void remapExecutable(); /// @return size of remapped area.
size_t remapExecutable();
} }

View File

@ -10,11 +10,16 @@ TEST(LocalAddress, SmokeTest)
DB::ShellCommand::Config config("/bin/hostname"); DB::ShellCommand::Config config("/bin/hostname");
config.arguments = {"-i"}; config.arguments = {"-i"};
auto cmd = DB::ShellCommand::executeDirect(config); auto cmd = DB::ShellCommand::executeDirect(config);
std::string address_str; std::string address_str;
DB::readString(address_str, cmd->out); DB::readString(address_str, cmd->out);
cmd->wait(); cmd->wait();
std::cerr << "Got Address: " << address_str << std::endl; std::cerr << "Got Address: " << address_str << std::endl;
/// hostname -i can return more than one address: "2001:db8:1::242:ac11:2 172.17.0.2"
if (auto space_pos = address_str.find(' '); space_pos != std::string::npos)
address_str = address_str.substr(0, space_pos);
Poco::Net::IPAddress address(address_str); Poco::Net::IPAddress address(address_str);
EXPECT_TRUE(DB::isLocalAddress(address)); EXPECT_TRUE(DB::isLocalAddress(address));

View File

@ -380,6 +380,14 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
min_log_id = last_commited_log_index; min_log_id = last_commited_log_index;
max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1;
} }
else if (last_commited_log_index != 0 && max_log_id < last_commited_log_index - 1) /// If we have more fresh snapshot than our logs
{
LOG_WARNING(log, "Our most fresh log_id {} is smaller than stored data in snapshot {}. It can indicate data loss. Removing outdated logs.", max_log_id, last_commited_log_index - 1);
removeAllLogs();
min_log_id = last_commited_log_index;
max_log_id = last_commited_log_index - 1;
}
else if (last_log_is_not_complete) /// if it's complete just start new one else if (last_log_is_not_complete) /// if it's complete just start new one
{ {
assert(last_log_read_result != std::nullopt); assert(last_log_read_result != std::nullopt);

View File

@ -1466,7 +1466,6 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite)
changelog2.append(entry); changelog2.append(entry);
changelog2.end_of_append_batch(0, 0); changelog2.end_of_append_batch(0, 0);
} }
} }
TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions)
@ -1515,6 +1514,33 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions)
} }
TEST_P(CoordinationTest, TestLogGap)
{
using namespace Coordination;
auto test_params = GetParam();
ChangelogDirTest logs("./logs");
DB::KeeperLogStore changelog("./logs", 100, true, test_params.enable_compression);
changelog.init(0, 3);
for (size_t i = 1; i < 55; ++i)
{
std::shared_ptr<ZooKeeperCreateRequest> request = std::make_shared<ZooKeeperCreateRequest>();
request->path = "/hello_" + std::to_string(i);
auto entry = getLogEntryFromZKRequest(0, 1, request);
changelog.append(entry);
changelog.end_of_append_batch(0, 0);
}
DB::KeeperLogStore changelog1("./logs", 100, true, test_params.enable_compression);
changelog1.init(61, 3);
/// Logs discarded
EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin" + test_params.extension));
EXPECT_EQ(changelog1.start_index(), 61);
EXPECT_EQ(changelog1.next_slot(), 61);
}
INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite,
CoordinationTest, CoordinationTest,
::testing::ValuesIn(std::initializer_list<CompressionParam>{ ::testing::ValuesIn(std::initializer_list<CompressionParam>{

View File

@ -97,6 +97,7 @@ public:
const char * getTypeName() const; const char * getTypeName() const;
const char * getDescription() const; const char * getDescription() const;
bool isCustom() const; bool isCustom() const;
bool isObsolete() const;
bool operator==(const SettingFieldRef & other) const { return (getName() == other.getName()) && (getValue() == other.getValue()); } bool operator==(const SettingFieldRef & other) const { return (getName() == other.getName()) && (getValue() == other.getValue()); }
bool operator!=(const SettingFieldRef & other) const { return !(*this == other); } bool operator!=(const SettingFieldRef & other) const { return !(*this == other); }
@ -182,6 +183,7 @@ struct BaseSettingsHelpers
{ {
IMPORTANT = 0x01, IMPORTANT = 0x01,
CUSTOM = 0x02, CUSTOM = 0x02,
OBSOLETE = 0x04,
}; };
static void writeFlags(Flags flags, WriteBuffer & out); static void writeFlags(Flags flags, WriteBuffer & out);
static Flags readFlags(ReadBuffer & in); static Flags readFlags(ReadBuffer & in);
@ -745,6 +747,17 @@ bool BaseSettings<Traits_>::SettingFieldRef::isCustom() const
return false; return false;
} }
template <typename Traits_>
bool BaseSettings<Traits_>::SettingFieldRef::isObsolete() const
{
if constexpr (Traits::allow_custom_settings)
{
if (custom_setting)
return false;
}
return accessor->isObsolete(index);
}
#define DECLARE_SETTINGS_TRAITS(SETTINGS_TRAITS_NAME, LIST_OF_SETTINGS_MACRO) \ #define DECLARE_SETTINGS_TRAITS(SETTINGS_TRAITS_NAME, LIST_OF_SETTINGS_MACRO) \
DECLARE_SETTINGS_TRAITS_COMMON(SETTINGS_TRAITS_NAME, LIST_OF_SETTINGS_MACRO, 0) DECLARE_SETTINGS_TRAITS_COMMON(SETTINGS_TRAITS_NAME, LIST_OF_SETTINGS_MACRO, 0)
@ -769,6 +782,7 @@ bool BaseSettings<Traits_>::SettingFieldRef::isCustom() const
const char * getTypeName(size_t index) const { return field_infos[index].type; } \ const char * getTypeName(size_t index) const { return field_infos[index].type; } \
const char * getDescription(size_t index) const { return field_infos[index].description; } \ const char * getDescription(size_t index) const { return field_infos[index].description; } \
bool isImportant(size_t index) const { return field_infos[index].is_important; } \ bool isImportant(size_t index) const { return field_infos[index].is_important; } \
bool isObsolete(size_t index) const { return field_infos[index].is_obsolete; } \
Field castValueUtil(size_t index, const Field & value) const { return field_infos[index].cast_value_util_function(value); } \ Field castValueUtil(size_t index, const Field & value) const { return field_infos[index].cast_value_util_function(value); } \
String valueToStringUtil(size_t index, const Field & value) const { return field_infos[index].value_to_string_util_function(value); } \ String valueToStringUtil(size_t index, const Field & value) const { return field_infos[index].value_to_string_util_function(value); } \
Field stringToValueUtil(size_t index, const String & str) const { return field_infos[index].string_to_value_util_function(str); } \ Field stringToValueUtil(size_t index, const String & str) const { return field_infos[index].string_to_value_util_function(str); } \
@ -789,6 +803,7 @@ bool BaseSettings<Traits_>::SettingFieldRef::isCustom() const
const char * type; \ const char * type; \
const char * description; \ const char * description; \
bool is_important; \ bool is_important; \
bool is_obsolete; \
Field (*cast_value_util_function)(const Field &); \ Field (*cast_value_util_function)(const Field &); \
String (*value_to_string_util_function)(const Field &); \ String (*value_to_string_util_function)(const Field &); \
Field (*string_to_value_util_function)(const String &); \ Field (*string_to_value_util_function)(const String &); \
@ -816,7 +831,7 @@ bool BaseSettings<Traits_>::SettingFieldRef::isCustom() const
static const Accessor the_instance = [] \ static const Accessor the_instance = [] \
{ \ { \
Accessor res; \ Accessor res; \
constexpr int IMPORTANT = 1; \ constexpr int IMPORTANT = 0x01; \
UNUSED(IMPORTANT); \ UNUSED(IMPORTANT); \
LIST_OF_SETTINGS_MACRO(IMPLEMENT_SETTINGS_TRAITS_) \ LIST_OF_SETTINGS_MACRO(IMPLEMENT_SETTINGS_TRAITS_) \
for (size_t i : collections::range(res.field_infos.size())) \ for (size_t i : collections::range(res.field_infos.size())) \
@ -845,6 +860,7 @@ bool BaseSettings<Traits_>::SettingFieldRef::isCustom() const
#define IMPLEMENT_SETTINGS_TRAITS_(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \ #define IMPLEMENT_SETTINGS_TRAITS_(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \
res.field_infos.emplace_back( \ res.field_infos.emplace_back( \
FieldInfo{#NAME, #TYPE, DESCRIPTION, FLAGS & IMPORTANT, \ FieldInfo{#NAME, #TYPE, DESCRIPTION, FLAGS & IMPORTANT, \
static_cast<bool>(FLAGS & BaseSettingsHelpers::Flags::OBSOLETE), \
[](const Field & value) -> Field { return static_cast<Field>(SettingField##TYPE{value}); }, \ [](const Field & value) -> Field { return static_cast<Field>(SettingField##TYPE{value}); }, \
[](const Field & value) -> String { return SettingField##TYPE{value}.toString(); }, \ [](const Field & value) -> String { return SettingField##TYPE{value}.toString(); }, \
[](const String & str) -> Field { SettingField##TYPE temp; temp.parseFromString(str); return static_cast<Field>(temp); }, \ [](const String & str) -> Field { SettingField##TYPE temp; temp.parseFromString(str); return static_cast<Field>(temp); }, \

View File

@ -457,14 +457,13 @@ class IColumn;
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
M(Bool, optimize_syntax_fuse_functions, true, "Allow apply syntax optimisation: fuse aggregate functions", 0) \ M(Bool, optimize_syntax_fuse_functions, false, "Allow apply syntax optimisation: fuse aggregate functions", 0) \
M(Bool, optimize_fuse_sum_count_avg, false, "Fuse functions `sum, avg, count` with identical arguments into one `sumCount` (`optimize_syntax_fuse_functions should be enabled)", 0) \ M(Bool, optimize_fuse_sum_count_avg, false, "Fuse functions `sum, avg, count` with identical arguments into one `sumCount` (`optimize_syntax_fuse_functions should be enabled)", 0) \
M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \ M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \ M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \ M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
@ -506,7 +505,7 @@ class IColumn;
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
\ \
M(String, local_filesystem_read_method, "pread", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, local_filesystem_read_method, "pread", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
M(String, remote_filesystem_read_method, "read", "Method of reading data from remote filesystem, one of: read, read_threadpool.", 0) \ M(String, remote_filesystem_read_method, "read", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \ M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \
M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \ M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \
@ -529,24 +528,28 @@ class IColumn;
/** Experimental functions */ \ /** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
\
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete setting, does nothing.", 0) \
M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing.", 0) \
M(Bool, enable_debug_queries, false, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_bigint_types, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_window_functions, true, "Obsolete setting, does nothing.", 0) \
M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \
M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \
M(UInt64, replication_alter_columns_timeout, 60, "Obsolete setting, does nothing.", 0) \
M(UInt64, odbc_max_field_size, 0, "Obsolete setting, does nothing.", 0) \
/** The section above is for obsolete settings. Do not add anything there. */
// End of COMMON_SETTINGS // End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below. // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
#define MAKE_OBSOLETE(M, TYPE, NAME, DEFAULT) \
M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE)
#define OBSOLETE_SETTINGS(M) \
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \
MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \
MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \
MAKE_OBSOLETE(M, Bool, allow_experimental_database_atomic, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_bigint_types, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \
MAKE_OBSOLETE(M, HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT) \
MAKE_OBSOLETE(M, Bool, database_replicated_ddl_output, true) \
MAKE_OBSOLETE(M, UInt64, replication_alter_columns_timeout, 60) \
MAKE_OBSOLETE(M, UInt64, odbc_max_field_size, 0) \
MAKE_OBSOLETE(M, Bool, allow_experimental_map_type, true) \
/** The section above is for obsolete settings. Do not add anything there. */
#define FORMAT_FACTORY_SETTINGS(M) \ #define FORMAT_FACTORY_SETTINGS(M) \
M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \ M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
@ -634,6 +637,7 @@ class IColumn;
#define LIST_OF_SETTINGS(M) \ #define LIST_OF_SETTINGS(M) \
COMMON_SETTINGS(M) \ COMMON_SETTINGS(M) \
OBSOLETE_SETTINGS(M) \
FORMAT_FACTORY_SETTINGS(M) FORMAT_FACTORY_SETTINGS(M)
DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS) DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS)

View File

@ -172,7 +172,7 @@ UInt64 DiskLocal::getTotalSpace() const
fs = getStatVFS((fs::path(disk_path) / "data/").string()); fs = getStatVFS((fs::path(disk_path) / "data/").string());
else else
fs = getStatVFS(disk_path); fs = getStatVFS(disk_path);
UInt64 total_size = fs.f_blocks * fs.f_bsize; UInt64 total_size = fs.f_blocks * fs.f_frsize;
if (total_size < keep_free_space_bytes) if (total_size < keep_free_space_bytes)
return 0; return 0;
return total_size - keep_free_space_bytes; return total_size - keep_free_space_bytes;
@ -187,7 +187,7 @@ UInt64 DiskLocal::getAvailableSpace() const
fs = getStatVFS((fs::path(disk_path) / "data/").string()); fs = getStatVFS((fs::path(disk_path) / "data/").string());
else else
fs = getStatVFS(disk_path); fs = getStatVFS(disk_path);
UInt64 total_size = fs.f_bavail * fs.f_bsize; UInt64 total_size = fs.f_bavail * fs.f_frsize;
if (total_size < keep_free_space_bytes) if (total_size < keep_free_space_bytes)
return 0; return 0;
return total_size - keep_free_space_bytes; return total_size - keep_free_space_bytes;

View File

@ -24,6 +24,8 @@ public:
void setReadUntilPosition(size_t position) override { impl->setReadUntilPosition(position); } void setReadUntilPosition(size_t position) override { impl->setReadUntilPosition(position); }
void setReadUntilEnd() override { impl->setReadUntilEnd(); }
private: private:
ReadLock lock; ReadLock lock;
}; };

View File

@ -166,7 +166,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & p
RemoteMetadata meta(path, remote_path); RemoteMetadata meta(path, remote_path);
meta.remote_fs_objects.emplace_back(std::make_pair(remote_path, iter->second.size)); meta.remote_fs_objects.emplace_back(std::make_pair(remote_path, iter->second.size));
bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::read_threadpool; bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool;
auto web_impl = std::make_unique<ReadBufferFromWebServerGather>(path, url, meta, getContext(), threadpool_read, read_settings); auto web_impl = std::make_unique<ReadBufferFromWebServerGather>(path, url, meta, getContext(), threadpool_read, read_settings);

View File

@ -2,12 +2,12 @@
#include <IO/SeekAvoidingReadBuffer.h> #include <IO/SeekAvoidingReadBuffer.h>
#include <Storages/HDFS/WriteBufferFromHDFS.h> #include <Storages/HDFS/WriteBufferFromHDFS.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h> #include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
#include <Disks/IO/ReadIndirectBufferFromRemoteFS.h> #include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h> #include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
#include <Disks/IO/ReadBufferFromRemoteFSGather.h> #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
#include <Disks/IO/ThreadPoolRemoteFSReader.h>
#include <base/logger_useful.h> #include <base/logger_useful.h>
#include <base/FnTraits.h> #include <base/FnTraits.h>
@ -77,7 +77,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path,
auto hdfs_impl = std::make_unique<ReadBufferFromHDFSGather>(path, config, remote_fs_root_path, metadata, read_settings.remote_fs_buffer_size); auto hdfs_impl = std::make_unique<ReadBufferFromHDFSGather>(path, config, remote_fs_root_path, metadata, read_settings.remote_fs_buffer_size);
if (read_settings.remote_fs_method == RemoteFSReadMethod::read_threadpool) if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
{ {
auto reader = getThreadPoolReader(); auto reader = getThreadPoolReader();
return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(reader, read_settings, std::move(hdfs_impl)); return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(reader, read_settings, std::move(hdfs_impl));
@ -169,6 +169,7 @@ void registerDiskHDFS(DiskFactory & factory)
fs::create_directories(disk); fs::create_directories(disk);
String uri{config.getString(config_prefix + ".endpoint")}; String uri{config.getString(config_prefix + ".endpoint")};
checkHDFSURL(uri);
if (uri.back() != '/') if (uri.back() != '/')
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri);

View File

@ -59,16 +59,23 @@ String AsynchronousReadIndirectBufferFromRemoteFS::getFileName() const
bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead()
{ {
/// Position is set only for MergeTree tables. /**
* Note: read_until_position here can be std::nullopt only for non-MergeTree tables.
* For mergeTree tables it must be guaranteed that setReadUntilPosition() or
* setReadUntilEnd() is called before any read or prefetch.
* setReadUntilEnd() always sets read_until_position to file size.
* setReadUntilPosition(pos) always has pos > 0, because if
* right_offset_in_compressed_file is 0, then setReadUntilEnd() is used.
*/
if (read_until_position) if (read_until_position)
{ {
/// Everything is already read. /// Everything is already read.
if (file_offset_of_buffer_end == read_until_position) if (file_offset_of_buffer_end == *read_until_position)
return false; return false;
if (file_offset_of_buffer_end > read_until_position) if (file_offset_of_buffer_end > *read_until_position)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {})", throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {})",
file_offset_of_buffer_end, read_until_position); file_offset_of_buffer_end, *read_until_position);
} }
else if (must_read_until_position) else if (must_read_until_position)
throw Exception(ErrorCodes::LOGICAL_ERROR, throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -117,7 +124,7 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t pos
throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilPosition"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilPosition");
read_until_position = position; read_until_position = position;
impl->setReadUntilPosition(read_until_position); impl->setReadUntilPosition(*read_until_position);
} }
@ -127,7 +134,7 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilEnd()
throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilEnd"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilEnd");
read_until_position = impl->getFileSize(); read_until_position = impl->getFileSize();
impl->setReadUntilPosition(read_until_position); impl->setReadUntilPosition(*read_until_position);
} }
@ -225,7 +232,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence
pos = working_buffer.end(); pos = working_buffer.end();
/// Note: we read in range [file_offset_of_buffer_end, read_until_position). /// Note: we read in range [file_offset_of_buffer_end, read_until_position).
if (file_offset_of_buffer_end < read_until_position if (read_until_position && file_offset_of_buffer_end < *read_until_position
&& static_cast<off_t>(file_offset_of_buffer_end) >= getPosition() && static_cast<off_t>(file_offset_of_buffer_end) >= getPosition()
&& static_cast<off_t>(file_offset_of_buffer_end) < getPosition() + static_cast<off_t>(min_bytes_for_seek)) && static_cast<off_t>(file_offset_of_buffer_end) < getPosition() + static_cast<off_t>(min_bytes_for_seek))
{ {

View File

@ -76,7 +76,7 @@ private:
size_t bytes_to_ignore = 0; size_t bytes_to_ignore = 0;
size_t read_until_position = 0; std::optional<size_t> read_until_position = 0;
bool must_read_until_position; bool must_read_until_position;
}; };

View File

@ -230,7 +230,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, co
LOG_TRACE(log, "Read from file by path: {}. Existing S3 objects: {}", LOG_TRACE(log, "Read from file by path: {}. Existing S3 objects: {}",
backQuote(metadata_path + path), metadata.remote_fs_objects.size()); backQuote(metadata_path + path), metadata.remote_fs_objects.size());
bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::read_threadpool; bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool;
auto s3_impl = std::make_unique<ReadBufferFromS3Gather>( auto s3_impl = std::make_unique<ReadBufferFromS3Gather>(
path, path,

View File

@ -103,6 +103,14 @@ struct IntHash64Impl
} }
}; };
template<typename T, typename HashFunction>
T combineHashesFunc(T t1, T t2)
{
T hashes[] = {t1, t2};
return HashFunction::apply(reinterpret_cast<const char *>(hashes), 2 * sizeof(T));
}
#if USE_SSL #if USE_SSL
struct HalfMD5Impl struct HalfMD5Impl
{ {
@ -248,8 +256,7 @@ struct SipHash64Impl
static UInt64 combineHashes(UInt64 h1, UInt64 h2) static UInt64 combineHashes(UInt64 h1, UInt64 h2)
{ {
UInt64 hashes[] = {h1, h2}; return combineHashesFunc<UInt64, SipHash64Impl>(h1, h2);
return apply(reinterpret_cast<const char *>(hashes), 16);
} }
static constexpr bool use_int_hash_for_pods = false; static constexpr bool use_int_hash_for_pods = false;
@ -258,12 +265,20 @@ struct SipHash64Impl
struct SipHash128Impl struct SipHash128Impl
{ {
static constexpr auto name = "sipHash128"; static constexpr auto name = "sipHash128";
enum { length = 16 };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data) using ReturnType = UInt128;
static UInt128 combineHashes(UInt128 h1, UInt128 h2)
{ {
sipHash128(begin, size, reinterpret_cast<char*>(out_char_data)); return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2);
} }
static UInt128 apply(const char * data, const size_t size)
{
return sipHash128(data, size);
}
static constexpr bool use_int_hash_for_pods = false;
}; };
/** Why we need MurmurHash2? /** Why we need MurmurHash2?
@ -380,12 +395,22 @@ struct MurmurHash3Impl64
struct MurmurHash3Impl128 struct MurmurHash3Impl128
{ {
static constexpr auto name = "murmurHash3_128"; static constexpr auto name = "murmurHash3_128";
enum { length = 16 };
static void apply(const char * begin, const size_t size, unsigned char * out_char_data) using ReturnType = UInt128;
static UInt128 apply(const char * data, const size_t size)
{ {
MurmurHash3_x64_128(begin, size, 0, out_char_data); char bytes[16];
MurmurHash3_x64_128(data, size, 0, bytes);
return *reinterpret_cast<UInt128 *>(bytes);
} }
static UInt128 combineHashes(UInt128 h1, UInt128 h2)
{
return combineHashesFunc<UInt128, MurmurHash3Impl128>(h1, h2);
}
static constexpr bool use_int_hash_for_pods = false;
}; };
/// http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452 /// http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452
@ -1093,6 +1118,11 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{ {
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
{
return std::make_shared<DataTypeFixedString>(sizeof(UInt128));
}
else
return std::make_shared<DataTypeNumber<ToType>>(); return std::make_shared<DataTypeNumber<ToType>>();
} }
@ -1115,6 +1145,13 @@ public:
for (const auto & col : arguments) for (const auto & col : arguments)
executeForArgument(col.type.get(), col.column.get(), vec_to, is_first_argument); executeForArgument(col.type.get(), col.column.get(), vec_to, is_first_argument);
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
{
auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128));
col_to_fixed_string->getChars() = std::move(*reinterpret_cast<ColumnFixedString::Chars *>(&col_to->getData()));
return col_to_fixed_string;
}
return col_to; return col_to;
} }
}; };
@ -1359,7 +1396,7 @@ using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;
using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>; using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>; using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
#endif #endif
using FunctionSipHash128 = FunctionStringHashFixedString<SipHash128Impl>; using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>; using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>; using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>; using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;
@ -1370,7 +1407,7 @@ using FunctionMurmurHash2_64 = FunctionAnyHash<MurmurHash2Impl64>;
using FunctionGccMurmurHash = FunctionAnyHash<GccMurmurHashImpl>; using FunctionGccMurmurHash = FunctionAnyHash<GccMurmurHashImpl>;
using FunctionMurmurHash3_32 = FunctionAnyHash<MurmurHash3Impl32>; using FunctionMurmurHash3_32 = FunctionAnyHash<MurmurHash3Impl32>;
using FunctionMurmurHash3_64 = FunctionAnyHash<MurmurHash3Impl64>; using FunctionMurmurHash3_64 = FunctionAnyHash<MurmurHash3Impl64>;
using FunctionMurmurHash3_128 = FunctionStringHashFixedString<MurmurHash3Impl128>; using FunctionMurmurHash3_128 = FunctionAnyHash<MurmurHash3Impl128>;
using FunctionJavaHash = FunctionAnyHash<JavaHashImpl>; using FunctionJavaHash = FunctionAnyHash<JavaHashImpl>;
using FunctionJavaHashUTF16LE = FunctionAnyHash<JavaHashUTF16LEImpl>; using FunctionJavaHashUTF16LE = FunctionAnyHash<JavaHashUTF16LEImpl>;

View File

@ -170,7 +170,7 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
req.SetKey(key); req.SetKey(key);
/** /**
* If remote_filesystem_read_method = 'read_threadpool', then for MergeTree family tables * If remote_filesystem_read_method = 'threadpool', then for MergeTree family tables
* exact byte ranges to read are always passed here. * exact byte ranges to read are always passed here.
*/ */
if (read_until_position) if (read_until_position)

View File

@ -46,7 +46,7 @@ enum class LocalFSReadMethod
enum class RemoteFSReadMethod enum class RemoteFSReadMethod
{ {
read, read,
read_threadpool, threadpool,
}; };
class MMappedFileCache; class MMappedFileCache;

View File

@ -8,6 +8,7 @@
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h> #include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectWithUnionQuery.h> #include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h> #include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTCreateQuery.h> #include <Parsers/ASTCreateQuery.h>
@ -86,7 +87,12 @@ private:
void visit(ASTSelectWithUnionQuery & select, ASTPtr &) const void visit(ASTSelectWithUnionQuery & select, ASTPtr &) const
{ {
for (auto & child : select.list_of_selects->children) for (auto & child : select.list_of_selects->children)
{
if (child->as<ASTSelectQuery>())
tryVisit<ASTSelectQuery>(child); tryVisit<ASTSelectQuery>(child);
else if (child->as<ASTSelectIntersectExceptQuery>())
tryVisit<ASTSelectIntersectExceptQuery>(child);
}
} }
void visit(ASTSelectQuery & select, ASTPtr &) const void visit(ASTSelectQuery & select, ASTPtr &) const
@ -97,6 +103,19 @@ private:
visitChildren(select); visitChildren(select);
} }
void visit(ASTSelectIntersectExceptQuery & select, ASTPtr &) const
{
for (auto & child : select.getListOfSelects())
{
if (child->as<ASTSelectQuery>())
tryVisit<ASTSelectQuery>(child);
else if (child->as<ASTSelectIntersectExceptQuery>())
tryVisit<ASTSelectIntersectExceptQuery>(child);
else if (child->as<ASTSelectWithUnionQuery>())
tryVisit<ASTSelectWithUnionQuery>(child);
}
}
void visit(ASTTablesInSelectQuery & tables, ASTPtr &) const void visit(ASTTablesInSelectQuery & tables, ASTPtr &) const
{ {
for (auto & child : tables.children) for (auto & child : tables.children)

View File

@ -1,6 +1,7 @@
#include <Interpreters/ApplyWithGlobalVisitor.h> #include <Interpreters/ApplyWithGlobalVisitor.h>
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h> #include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Parsers/ASTWithAlias.h> #include <Parsers/ASTWithAlias.h>
namespace DB namespace DB
@ -40,6 +41,31 @@ void ApplyWithGlobalVisitor::visit(
{ {
visit(*node_select, exprs, with_expression_list); visit(*node_select, exprs, with_expression_list);
} }
else if (ASTSelectIntersectExceptQuery * node_intersect_except = select->as<ASTSelectIntersectExceptQuery>())
{
visit(*node_intersect_except, exprs, with_expression_list);
}
}
}
void ApplyWithGlobalVisitor::visit(
ASTSelectIntersectExceptQuery & selects, const std::map<String, ASTPtr> & exprs, const ASTPtr & with_expression_list)
{
auto selects_list = selects.getListOfSelects();
for (auto & select : selects_list)
{
if (ASTSelectWithUnionQuery * node_union = select->as<ASTSelectWithUnionQuery>())
{
visit(*node_union, exprs, with_expression_list);
}
else if (ASTSelectQuery * node_select = select->as<ASTSelectQuery>())
{
visit(*node_select, exprs, with_expression_list);
}
else if (ASTSelectIntersectExceptQuery * node_intersect_except = select->as<ASTSelectIntersectExceptQuery>())
{
visit(*node_intersect_except, exprs, with_expression_list);
}
} }
} }
@ -47,7 +73,7 @@ void ApplyWithGlobalVisitor::visit(ASTPtr & ast)
{ {
if (ASTSelectWithUnionQuery * node_union = ast->as<ASTSelectWithUnionQuery>()) if (ASTSelectWithUnionQuery * node_union = ast->as<ASTSelectWithUnionQuery>())
{ {
if (auto * first_select = node_union->list_of_selects->children[0]->as<ASTSelectQuery>()) if (auto * first_select = typeid_cast<ASTSelectQuery *>(node_union->list_of_selects->children[0].get()))
{ {
ASTPtr with_expression_list = first_select->with(); ASTPtr with_expression_list = first_select->with();
if (with_expression_list) if (with_expression_list)
@ -64,6 +90,8 @@ void ApplyWithGlobalVisitor::visit(ASTPtr & ast)
visit(*union_child, exprs, with_expression_list); visit(*union_child, exprs, with_expression_list);
else if (auto * select_child = (*it)->as<ASTSelectQuery>()) else if (auto * select_child = (*it)->as<ASTSelectQuery>())
visit(*select_child, exprs, with_expression_list); visit(*select_child, exprs, with_expression_list);
else if (auto * intersect_except_child = (*it)->as<ASTSelectIntersectExceptQuery>())
visit(*intersect_except_child, exprs, with_expression_list);
} }
} }
} }

View File

@ -8,6 +8,7 @@ namespace DB
class ASTSelectWithUnionQuery; class ASTSelectWithUnionQuery;
class ASTSelectQuery; class ASTSelectQuery;
class ASTSelectIntersectExceptQuery;
/// Pull out the WITH statement from the first child of ASTSelectWithUnion query if any. /// Pull out the WITH statement from the first child of ASTSelectWithUnion query if any.
class ApplyWithGlobalVisitor class ApplyWithGlobalVisitor
@ -18,6 +19,7 @@ public:
private: private:
static void visit(ASTSelectWithUnionQuery & selects, const std::map<String, ASTPtr> & exprs, const ASTPtr & with_expression_list); static void visit(ASTSelectWithUnionQuery & selects, const std::map<String, ASTPtr> & exprs, const ASTPtr & with_expression_list);
static void visit(ASTSelectQuery & select, const std::map<String, ASTPtr> & exprs, const ASTPtr & with_expression_list); static void visit(ASTSelectQuery & select, const std::map<String, ASTPtr> & exprs, const ASTPtr & with_expression_list);
static void visit(ASTSelectIntersectExceptQuery & select, const std::map<String, ASTPtr> & exprs, const ASTPtr & with_expression_list);
}; };
} }

View File

@ -1222,9 +1222,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
{ {
auto stat = getStatVFS(getContext()->getPath()); auto stat = getStatVFS(getContext()->getPath());
new_values["FilesystemMainPathTotalBytes"] = stat.f_blocks * stat.f_bsize; new_values["FilesystemMainPathTotalBytes"] = stat.f_blocks * stat.f_frsize;
new_values["FilesystemMainPathAvailableBytes"] = stat.f_bavail * stat.f_bsize; new_values["FilesystemMainPathAvailableBytes"] = stat.f_bavail * stat.f_frsize;
new_values["FilesystemMainPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_bsize; new_values["FilesystemMainPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_frsize;
new_values["FilesystemMainPathTotalINodes"] = stat.f_files; new_values["FilesystemMainPathTotalINodes"] = stat.f_files;
new_values["FilesystemMainPathAvailableINodes"] = stat.f_favail; new_values["FilesystemMainPathAvailableINodes"] = stat.f_favail;
new_values["FilesystemMainPathUsedINodes"] = stat.f_files - stat.f_favail; new_values["FilesystemMainPathUsedINodes"] = stat.f_files - stat.f_favail;
@ -1234,9 +1234,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
/// Current working directory of the server is the directory with logs. /// Current working directory of the server is the directory with logs.
auto stat = getStatVFS("."); auto stat = getStatVFS(".");
new_values["FilesystemLogsPathTotalBytes"] = stat.f_blocks * stat.f_bsize; new_values["FilesystemLogsPathTotalBytes"] = stat.f_blocks * stat.f_frsize;
new_values["FilesystemLogsPathAvailableBytes"] = stat.f_bavail * stat.f_bsize; new_values["FilesystemLogsPathAvailableBytes"] = stat.f_bavail * stat.f_frsize;
new_values["FilesystemLogsPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_bsize; new_values["FilesystemLogsPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_frsize;
new_values["FilesystemLogsPathTotalINodes"] = stat.f_files; new_values["FilesystemLogsPathTotalINodes"] = stat.f_files;
new_values["FilesystemLogsPathAvailableINodes"] = stat.f_favail; new_values["FilesystemLogsPathAvailableINodes"] = stat.f_favail;
new_values["FilesystemLogsPathUsedINodes"] = stat.f_files - stat.f_favail; new_values["FilesystemLogsPathUsedINodes"] = stat.f_files - stat.f_favail;

View File

@ -510,10 +510,23 @@ String Context::getUserScriptsPath() const
return shared->user_scripts_path; return shared->user_scripts_path;
} }
std::vector<String> Context::getWarnings() const Strings Context::getWarnings() const
{
Strings common_warnings;
{ {
auto lock = getLock(); auto lock = getLock();
return shared->warnings; common_warnings = shared->warnings;
}
for (const auto & setting : settings)
{
if (setting.isValueChanged() && setting.isObsolete())
{
common_warnings.emplace_back("Some obsolete setting is changed. "
"Check 'select * from system.settings where changed' and read the changelog.");
break;
}
}
return common_warnings;
} }
VolumePtr Context::getTemporaryVolume() const VolumePtr Context::getTemporaryVolume() const
@ -572,35 +585,6 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic
return shared->tmp_volume; return shared->tmp_volume;
} }
void Context::setBackupsVolume(const String & path, const String & policy_name)
{
std::lock_guard lock(shared->storage_policies_mutex);
if (policy_name.empty())
{
String path_with_separator = path;
if (!path_with_separator.ends_with('/'))
path_with_separator += '/';
auto disk = std::make_shared<DiskLocal>("_backups_default", path_with_separator, 0);
shared->backups_volume = std::make_shared<SingleDiskVolume>("_backups_default", disk, 0);
}
else
{
StoragePolicyPtr policy = getStoragePolicySelector(lock)->get(policy_name);
if (policy->getVolumes().size() != 1)
throw Exception("Policy " + policy_name + " is used for backups, such policy should have exactly one volume",
ErrorCodes::NO_ELEMENTS_IN_CONFIG);
shared->backups_volume = policy->getVolume(0);
}
BackupFactory::instance().setBackupsVolume(shared->backups_volume);
}
VolumePtr Context::getBackupsVolume() const
{
std::lock_guard lock(shared->storage_policies_mutex);
return shared->backups_volume;
}
void Context::setFlagsPath(const String & path) void Context::setFlagsPath(const String & path)
{ {
auto lock = getLock(); auto lock = getLock();

View File

@ -340,7 +340,7 @@ public:
String getUserScriptsPath() const; String getUserScriptsPath() const;
/// A list of warnings about server configuration to place in `system.warnings` table. /// A list of warnings about server configuration to place in `system.warnings` table.
std::vector<String> getWarnings() const; Strings getWarnings() const;
VolumePtr getTemporaryVolume() const; VolumePtr getTemporaryVolume() const;
@ -354,9 +354,6 @@ public:
VolumePtr setTemporaryStorage(const String & path, const String & policy_name = ""); VolumePtr setTemporaryStorage(const String & path, const String & policy_name = "");
void setBackupsVolume(const String & path, const String & policy_name = "");
VolumePtr getBackupsVolume() const;
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>; using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
/// Global application configuration settings. /// Global application configuration settings.

View File

@ -27,6 +27,7 @@
#include <Core/ColumnNumbers.h> #include <Core/ColumnNumbers.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
namespace DB namespace DB
{ {
@ -289,13 +290,11 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
if (table_join->getDictionaryReader()) if (table_join->getDictionaryReader())
{ {
assert(disjuncts_num == 1); assert(disjuncts_num == 1);
LOG_DEBUG(log, "Performing join over dict");
data->type = Type::DICT; data->type = Type::DICT;
data->maps.resize(disjuncts_num); data->maps.resize(disjuncts_num);
std::get<MapsOne>(data->maps[0]).create(Type::DICT); std::get<MapsOne>(data->maps[0]).create(Type::DICT);
key_sizes.resize(1); chooseMethod(kind, key_columns, key_sizes.emplace_back()); /// init key_sizes
chooseMethod(key_columns, key_sizes[0]); /// init key_sizes
} }
else if (strictness == ASTTableJoin::Strictness::Asof) else if (strictness == ASTTableJoin::Strictness::Asof)
{ {
@ -321,13 +320,13 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
/// Therefore, add it back in such that it can be extracted appropriately from the full stored /// Therefore, add it back in such that it can be extracted appropriately from the full stored
/// key_columns and key_sizes /// key_columns and key_sizes
auto & asof_key_sizes = key_sizes.emplace_back(); auto & asof_key_sizes = key_sizes.emplace_back();
data->type = chooseMethod(key_columns, asof_key_sizes); data->type = chooseMethod(kind, key_columns, asof_key_sizes);
asof_key_sizes.push_back(asof_size); asof_key_sizes.push_back(asof_size);
} }
else else
{ {
/// Choose data structure to use for JOIN. /// Choose data structure to use for JOIN.
auto current_join_method = chooseMethod(key_columns, key_sizes.emplace_back()); auto current_join_method = chooseMethod(kind, key_columns, key_sizes.emplace_back());
if (data->type == Type::EMPTY) if (data->type == Type::EMPTY)
data->type = current_join_method; data->type = current_join_method;
else if (data->type != current_join_method) else if (data->type != current_join_method)
@ -337,14 +336,20 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
for (auto & maps : data->maps) for (auto & maps : data->maps)
dataMapInit(maps); dataMapInit(maps);
LOG_DEBUG(log, "Join type: {}, kind: {}, strictness: {}", data->type, kind, strictness);
} }
HashJoin::Type HashJoin::chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes) HashJoin::Type HashJoin::chooseMethod(ASTTableJoin::Kind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes)
{ {
size_t keys_size = key_columns.size(); size_t keys_size = key_columns.size();
if (keys_size == 0) if (keys_size == 0)
{
if (isCrossOrComma(kind))
return Type::CROSS; return Type::CROSS;
return Type::EMPTY;
}
bool all_fixed = true; bool all_fixed = true;
size_t keys_bytes = 0; size_t keys_bytes = 0;
@ -446,6 +451,23 @@ private:
std::vector<size_t> positions; std::vector<size_t> positions;
}; };
/// Dummy key getter, always find nothing, used for JOIN ON NULL
template <typename Mapped>
class KeyGetterEmpty
{
public:
struct MappedType
{
using mapped_type = Mapped;
};
using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
KeyGetterEmpty() = default;
FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); }
};
template <HashJoin::Type type, typename Value, typename Mapped> template <HashJoin::Type type, typename Value, typename Mapped>
struct KeyGetterForTypeImpl; struct KeyGetterForTypeImpl;
@ -723,8 +745,6 @@ Block HashJoin::structureRightBlock(const Block & block) const
bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
{ {
if (empty())
throw Exception("Logical error: HashJoin was not initialized", ErrorCodes::LOGICAL_ERROR);
if (overDictionary()) if (overDictionary())
throw Exception("Logical error: insert into hash-map in HashJoin over dictionary", ErrorCodes::LOGICAL_ERROR); throw Exception("Logical error: insert into hash-map in HashJoin over dictionary", ErrorCodes::LOGICAL_ERROR);
@ -777,15 +797,15 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
auto join_mask_col = JoinCommon::getColumnAsMask(block, onexprs[onexpr_idx].condColumnNames().second); auto join_mask_col = JoinCommon::getColumnAsMask(block, onexprs[onexpr_idx].condColumnNames().second);
/// Save blocks that do not hold conditions in ON section /// Save blocks that do not hold conditions in ON section
ColumnUInt8::MutablePtr not_joined_map = nullptr; ColumnUInt8::MutablePtr not_joined_map = nullptr;
if (!multiple_disjuncts && isRightOrFull(kind) && join_mask_col) if (!multiple_disjuncts && isRightOrFull(kind) && !join_mask_col.isConstant())
{ {
const auto & join_mask = assert_cast<const ColumnUInt8 &>(*join_mask_col).getData(); const auto & join_mask = join_mask_col.getData();
/// Save rows that do not hold conditions /// Save rows that do not hold conditions
not_joined_map = ColumnUInt8::create(block.rows(), 0); not_joined_map = ColumnUInt8::create(block.rows(), 0);
for (size_t i = 0, sz = join_mask.size(); i < sz; ++i) for (size_t i = 0, sz = join_mask->size(); i < sz; ++i)
{ {
/// Condition hold, do not save row /// Condition hold, do not save row
if (join_mask[i]) if ((*join_mask)[i])
continue; continue;
/// NULL key will be saved anyway because, do not save twice /// NULL key will be saved anyway because, do not save twice
@ -802,7 +822,8 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
{ {
size_t size = insertFromBlockImpl<strictness_>( size_t size = insertFromBlockImpl<strictness_>(
*this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map, *this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map,
join_mask_col ? &assert_cast<const ColumnUInt8 &>(*join_mask_col).getData() : nullptr, /// If mask is false constant, rows are added to hashmap anyway. It's not a happy-flow, so this case is not optimized
join_mask_col.getData(),
data->pool); data->pool);
if (multiple_disjuncts) if (multiple_disjuncts)
@ -846,7 +867,7 @@ struct JoinOnKeyColumns
ColumnPtr null_map_holder; ColumnPtr null_map_holder;
/// Only rows where mask == true can be joined /// Only rows where mask == true can be joined
ColumnPtr join_mask_column; JoinCommon::JoinMask join_mask_column;
Sizes key_sizes; Sizes key_sizes;
@ -859,17 +880,10 @@ struct JoinOnKeyColumns
, null_map_holder(extractNestedColumnsAndNullMap(key_columns, null_map)) , null_map_holder(extractNestedColumnsAndNullMap(key_columns, null_map))
, join_mask_column(JoinCommon::getColumnAsMask(block, cond_column_name)) , join_mask_column(JoinCommon::getColumnAsMask(block, cond_column_name))
, key_sizes(key_sizes_) , key_sizes(key_sizes_)
{} {
}
bool isRowFiltered(size_t i) const bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); }
{
if (join_mask_column)
{
UInt8ColumnDataPtr mask = &assert_cast<const ColumnUInt8 &>(*(join_mask_column)).getData();
return !(*mask)[i];
}
return false;
}
}; };
class AddedColumns class AddedColumns
@ -985,6 +999,7 @@ public:
const IColumn & leftAsofKey() const { return *left_asof_key; } const IColumn & leftAsofKey() const { return *left_asof_key; }
std::vector<JoinOnKeyColumns> join_on_keys; std::vector<JoinOnKeyColumns> join_on_keys;
size_t rows_to_add; size_t rows_to_add;
std::unique_ptr<IColumn::Offsets> offsets_to_replicate; std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
bool need_filter = false; bool need_filter = false;
@ -998,6 +1013,7 @@ private:
std::optional<TypeIndex> asof_type; std::optional<TypeIndex> asof_type;
ASOF::Inequality asof_inequality; ASOF::Inequality asof_inequality;
const IColumn * left_asof_key = nullptr; const IColumn * left_asof_key = nullptr;
bool is_join_get; bool is_join_get;
void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name)
@ -1373,6 +1389,22 @@ IColumn::Filter switchJoinRightColumns(
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
switch (type) switch (type)
{ {
case HashJoin::Type::EMPTY:
{
if constexpr (!is_asof_join)
{
using KeyGetter = KeyGetterEmpty<typename Maps::MappedType>;
std::vector<KeyGetter> key_getter_vector;
key_getter_vector.emplace_back();
using MapTypeVal = typename KeyGetter::MappedType;
std::vector<const MapTypeVal *> a_map_type_vector;
a_map_type_vector.emplace_back();
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetter>(
std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags);
}
throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type);
}
#define M(TYPE) \ #define M(TYPE) \
case HashJoin::Type::TYPE: \ case HashJoin::Type::TYPE: \
{ \ { \
@ -1393,7 +1425,7 @@ IColumn::Filter switchJoinRightColumns(
#undef M #undef M
default: default:
throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast<UInt32>(type)), ErrorCodes::UNSUPPORTED_JOIN_KEYS); throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type);
} }
} }
@ -1828,7 +1860,7 @@ class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller
{ {
public: public:
NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_) NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_)
: parent(parent_), max_block_size(max_block_size_) : parent(parent_), max_block_size(max_block_size_), current_block_start(0)
{} {}
Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); }
@ -1836,6 +1868,12 @@ public:
size_t fillColumns(MutableColumns & columns_right) override size_t fillColumns(MutableColumns & columns_right) override
{ {
size_t rows_added = 0; size_t rows_added = 0;
if (unlikely(parent.data->type == HashJoin::Type::EMPTY))
{
rows_added = fillColumnsFromData(parent.data->blocks, columns_right);
}
else
{
auto fill_callback = [&](auto, auto strictness, auto & map) auto fill_callback = [&](auto, auto strictness, auto & map)
{ {
rows_added = fillColumnsFromMap<strictness>(map, columns_right); rows_added = fillColumnsFromMap<strictness>(map, columns_right);
@ -1843,6 +1881,7 @@ public:
if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness);
}
if constexpr (!multiple_disjuncts) if constexpr (!multiple_disjuncts)
{ {
@ -1856,10 +1895,48 @@ private:
const HashJoin & parent; const HashJoin & parent;
UInt64 max_block_size; UInt64 max_block_size;
size_t current_block_start;
std::any position; std::any position;
std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position; std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position;
std::optional<BlocksList::const_iterator> used_position; std::optional<BlocksList::const_iterator> used_position;
size_t fillColumnsFromData(const BlocksList & blocks, MutableColumns & columns_right)
{
if (!position.has_value())
position = std::make_any<BlocksList::const_iterator>(blocks.begin());
auto & block_it = std::any_cast<BlocksList::const_iterator &>(position);
auto end = blocks.end();
size_t rows_added = 0;
for (; block_it != end; ++block_it)
{
size_t rows_from_block = std::min<size_t>(max_block_size - rows_added, block_it->rows() - current_block_start);
for (size_t j = 0; j < columns_right.size(); ++j)
{
const auto & col = block_it->getByPosition(j).column;
columns_right[j]->insertRangeFrom(*col, current_block_start, rows_from_block);
}
rows_added += rows_from_block;
if (rows_added >= max_block_size)
{
/// How many rows have been read
current_block_start += rows_from_block;
if (block_it->rows() <= current_block_start)
{
/// current block was fully read
++block_it;
current_block_start = 0;
}
break;
}
current_block_start = 0;
}
return rows_added;
}
template <ASTTableJoin::Strictness STRICTNESS, typename Maps> template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
{ {
@ -1871,8 +1948,7 @@ private:
APPLY_FOR_JOIN_VARIANTS(M) APPLY_FOR_JOIN_VARIANTS(M)
#undef M #undef M
default: default:
throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast<UInt32>(parent.data->type)), throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type) ;
ErrorCodes::UNSUPPORTED_JOIN_KEYS);
} }
__builtin_unreachable(); __builtin_unreachable();
@ -1949,12 +2025,14 @@ private:
for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it) for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it)
{ {
const Block * block = it->first; const auto * block = it->first;
const NullMap & nullmap = assert_cast<const ColumnUInt8 &>(*it->second).getData(); ConstNullMapPtr nullmap = nullptr;
if (it->second)
nullmap = &assert_cast<const ColumnUInt8 &>(*it->second).getData();
for (size_t row = 0; row < nullmap.size(); ++row) for (size_t row = 0; row < block->rows(); ++row)
{ {
if (nullmap[row]) if (nullmap && (*nullmap)[row])
{ {
for (size_t col = 0; col < columns_keys_and_right.size(); ++col) for (size_t col = 0; col < columns_keys_and_right.size(); ++col)
columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row);

View File

@ -231,6 +231,7 @@ public:
template <typename Mapped> template <typename Mapped>
struct MapsTemplate struct MapsTemplate
{ {
using MappedType = Mapped;
std::unique_ptr<FixedHashMap<UInt8, Mapped>> key8; std::unique_ptr<FixedHashMap<UInt8, Mapped>> key8;
std::unique_ptr<FixedHashMap<UInt16, Mapped>> key16; std::unique_ptr<FixedHashMap<UInt16, Mapped>> key16;
std::unique_ptr<HashMap<UInt32, Mapped, HashCRC32<UInt32>>> key32; std::unique_ptr<HashMap<UInt32, Mapped, HashCRC32<UInt32>>> key32;
@ -411,7 +412,7 @@ private:
void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const; void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const;
static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); static Type chooseMethod(ASTTableJoin::Kind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes);
bool empty() const; bool empty() const;
bool overDictionary() const; bool overDictionary() const;

View File

@ -191,7 +191,9 @@ private:
ASTTableJoin * table_join = node.table_join->as<ASTTableJoin>(); ASTTableJoin * table_join = node.table_join->as<ASTTableJoin>();
if (table_join->locality != ASTTableJoin::Locality::Global) if (table_join->locality != ASTTableJoin::Locality::Global)
{ {
if (auto & subquery = node.table_expression->as<ASTTableExpression>()->subquery) if (auto * table = node.table_expression->as<ASTTableExpression>())
{
if (auto & subquery = table->subquery)
{ {
std::vector<ASTPtr> renamed; std::vector<ASTPtr> renamed;
NonGlobalTableVisitor::Data table_data(data.getContext(), data.checker, renamed, nullptr, table_join); NonGlobalTableVisitor::Data table_data(data.getContext(), data.checker, renamed, nullptr, table_join);
@ -199,6 +201,16 @@ private:
if (!renamed.empty()) //-V547 if (!renamed.empty()) //-V547
data.renamed_tables.emplace_back(subquery, std::move(renamed)); data.renamed_tables.emplace_back(subquery, std::move(renamed));
} }
else if (table->database_and_table_name)
{
auto tb = node.table_expression;
std::vector<ASTPtr> renamed;
NonGlobalTableVisitor::Data table_data{data.getContext(), data.checker, renamed, nullptr, table_join};
NonGlobalTableVisitor(table_data).visit(tb);
if (!renamed.empty()) //-V547
data.renamed_tables.emplace_back(tb, std::move(renamed));
}
}
} }
} }
}; };

View File

@ -12,40 +12,39 @@ namespace DB
{ {
namespace namespace
{ {
BackupSettings getBackupSettings(const ASTBackupQuery & query) BackupMutablePtr createBackup(const ASTBackupQuery & query, const ContextPtr & context)
{ {
BackupSettings settings; BackupFactory::CreateParams params;
params.open_mode = (query.kind == ASTBackupQuery::BACKUP) ? IBackup::OpenMode::WRITE : IBackup::OpenMode::READ;
params.context = context;
params.backup_info = BackupInfo::fromAST(*query.backup_name);
if (query.base_backup_name)
params.base_backup_info = BackupInfo::fromAST(*query.base_backup_name);
return BackupFactory::instance().createBackup(params);
}
#if 0
void getBackupSettings(const ASTBackupQuery & query, BackupSettings & settings, std::optional<BaseBackupInfo> & base_backup)
{
settings = {};
if (query.settings) if (query.settings)
settings.applyChanges(query.settings->as<const ASTSetQuery &>().changes); settings.applyChanges(query.settings->as<const ASTSetQuery &>().changes);
return settings; return settings;
} }
#endif
BackupPtr getBaseBackup(const BackupSettings & settings)
{
const String & base_backup_name = settings.base_backup;
if (base_backup_name.empty())
return nullptr;
return BackupFactory::instance().openBackup(base_backup_name);
}
void executeBackup(const ASTBackupQuery & query, const ContextPtr & context) void executeBackup(const ASTBackupQuery & query, const ContextPtr & context)
{ {
auto settings = getBackupSettings(query); BackupMutablePtr backup = createBackup(query, context);
auto base_backup = getBaseBackup(settings);
auto backup_entries = makeBackupEntries(query.elements, context); auto backup_entries = makeBackupEntries(query.elements, context);
UInt64 estimated_backup_size = estimateBackupSize(backup_entries, base_backup);
auto backup = BackupFactory::instance().createBackup(query.backup_name, estimated_backup_size, base_backup);
writeBackupEntries(backup, std::move(backup_entries), context->getSettingsRef().max_backup_threads); writeBackupEntries(backup, std::move(backup_entries), context->getSettingsRef().max_backup_threads);
} }
void executeRestore(const ASTBackupQuery & query, ContextMutablePtr context) void executeRestore(const ASTBackupQuery & query, ContextMutablePtr context)
{ {
auto settings = getBackupSettings(query); BackupPtr backup = createBackup(query, context);
auto base_backup = getBaseBackup(settings);
auto backup = BackupFactory::instance().openBackup(query.backup_name, base_backup);
auto restore_tasks = makeRestoreTasks(query.elements, context, backup); auto restore_tasks = makeRestoreTasks(query.elements, context, backup);
executeRestoreTasks(std::move(restore_tasks), context->getSettingsRef().max_backup_threads); executeRestoreTasks(std::move(restore_tasks), context->getSettingsRef().max_backup_threads);
} }

View File

@ -56,7 +56,7 @@ InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery(
ASTSelectIntersectExceptQuery * ast = query_ptr->as<ASTSelectIntersectExceptQuery>(); ASTSelectIntersectExceptQuery * ast = query_ptr->as<ASTSelectIntersectExceptQuery>();
final_operator = ast->final_operator; final_operator = ast->final_operator;
const auto & children = ast->children; const auto & children = ast->getListOfSelects();
size_t num_children = children.size(); size_t num_children = children.size();
/// AST must have been changed by the visitor. /// AST must have been changed by the visitor.

View File

@ -880,51 +880,43 @@ static bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query)
/** NOTE You can also check that the table in the subquery is distributed, and that it only looks at one shard. /** NOTE You can also check that the table in the subquery is distributed, and that it only looks at one shard.
* In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end. * In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end.
*/ */
if (auto query_table = extractTableExpression(query, 0)) if (auto query_table = extractTableExpression(query, 0))
{ {
if (const auto * ast_union = query_table->as<ASTSelectWithUnionQuery>()) if (const auto * ast_union = query_table->as<ASTSelectWithUnionQuery>())
{ {
for (const auto & elem : ast_union->list_of_selects->children) /** NOTE
{ * 1. For ASTSelectWithUnionQuery after normalization for union child node the height of the AST tree is at most 2.
/// After normalization for union child node the height of the AST tree is at most 2. * 2. For ASTSelectIntersectExceptQuery after normalization in case there are intersect or except nodes,
if (const auto * child_union = elem->as<ASTSelectWithUnionQuery>()) * the height of the AST tree can have any depth (each intersect/except adds a level), but the
{ * number of children in those nodes is always 2.
for (const auto & child_elem : child_union->list_of_selects->children) */
if (hasWithTotalsInAnySubqueryInFromClause(child_elem->as<ASTSelectQuery &>()))
return true;
}
/// After normalization in case there are intersect or except nodes, the height of
/// the AST tree can have any depth (each intersect/except adds a level), but the
/// number of children in those nodes is always 2.
else if (elem->as<ASTSelectIntersectExceptQuery>())
{
std::function<bool(ASTPtr)> traverse_recursively = [&](ASTPtr child_ast) -> bool std::function<bool(ASTPtr)> traverse_recursively = [&](ASTPtr child_ast) -> bool
{ {
if (const auto * child = child_ast->as <ASTSelectQuery>()) if (const auto * select_child = child_ast->as <ASTSelectQuery>())
return hasWithTotalsInAnySubqueryInFromClause(child->as<ASTSelectQuery &>()); {
if (hasWithTotalsInAnySubqueryInFromClause(select_child->as<ASTSelectQuery &>()))
if (const auto * child = child_ast->as<ASTSelectWithUnionQuery>()) return true;
for (const auto & subchild : child->list_of_selects->children) }
else if (const auto * union_child = child_ast->as<ASTSelectWithUnionQuery>())
{
for (const auto & subchild : union_child->list_of_selects->children)
if (traverse_recursively(subchild)) if (traverse_recursively(subchild))
return true; return true;
}
if (const auto * child = child_ast->as<ASTSelectIntersectExceptQuery>()) else if (const auto * intersect_child = child_ast->as<ASTSelectIntersectExceptQuery>())
for (const auto & subchild : child->children) {
auto selects = intersect_child->getListOfSelects();
for (const auto & subchild : selects)
if (traverse_recursively(subchild)) if (traverse_recursively(subchild))
return true; return true;
}
return false; return false;
}; };
for (const auto & elem : ast_union->list_of_selects->children)
if (traverse_recursively(elem)) if (traverse_recursively(elem))
return true; return true;
} }
else
{
if (hasWithTotalsInAnySubqueryInFromClause(elem->as<ASTSelectQuery &>()))
return true;
}
}
}
} }
return false; return false;

View File

@ -86,7 +86,9 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
if (num_children == 1 && settings_limit_offset_needed) if (num_children == 1 && settings_limit_offset_needed)
{ {
const ASTPtr first_select_ast = ast->list_of_selects->children.at(0); const ASTPtr first_select_ast = ast->list_of_selects->children.at(0);
ASTSelectQuery * select_query = first_select_ast->as<ASTSelectQuery>(); ASTSelectQuery * select_query = dynamic_cast<ASTSelectQuery *>(first_select_ast.get());
if (!select_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid type in list_of_selects: {}", first_select_ast->getID());
if (!select_query->withFill() && !select_query->limit_with_ties) if (!select_query->withFill() && !select_query->limit_with_ties)
{ {

View File

@ -50,12 +50,12 @@ ColumnWithTypeAndName condtitionColumnToJoinable(const Block & block, const Stri
if (!src_column_name.empty()) if (!src_column_name.empty())
{ {
auto mask_col = JoinCommon::getColumnAsMask(block, src_column_name); auto join_mask = JoinCommon::getColumnAsMask(block, src_column_name);
assert(mask_col); if (!join_mask.isConstant())
const auto & mask_data = assert_cast<const ColumnUInt8 &>(*mask_col).getData(); {
for (size_t i = 0; i < res_size; ++i) for (size_t i = 0; i < res_size; ++i)
null_map->getData()[i] = !mask_data[i]; null_map->getData()[i] = join_mask.isRowFiltered(i);
}
} }
ColumnPtr res_col = ColumnNullable::create(std::move(data_col), std::move(null_map)); ColumnPtr res_col = ColumnNullable::create(std::move(data_col), std::move(null_map));
@ -477,6 +477,7 @@ MergeJoin::MergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right
, max_joined_block_rows(table_join->maxJoinedBlockRows()) , max_joined_block_rows(table_join->maxJoinedBlockRows())
, max_rows_in_right_block(table_join->maxRowsInRightBlock()) , max_rows_in_right_block(table_join->maxRowsInRightBlock())
, max_files_to_merge(table_join->maxFilesToMerge()) , max_files_to_merge(table_join->maxFilesToMerge())
, log(&Poco::Logger::get("MergeJoin"))
{ {
switch (table_join->strictness()) switch (table_join->strictness())
{ {
@ -549,6 +550,8 @@ MergeJoin::MergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right
makeSortAndMerge(key_names_left, left_sort_description, left_merge_description); makeSortAndMerge(key_names_left, left_sort_description, left_merge_description);
makeSortAndMerge(key_names_right, right_sort_description, right_merge_description); makeSortAndMerge(key_names_right, right_sort_description, right_merge_description);
LOG_DEBUG(log, "Joining keys: left [{}], right [{}]", fmt::join(key_names_left, ", "), fmt::join(key_names_right, ", "));
/// Temporary disable 'partial_merge_join_left_table_buffer_bytes' without 'partial_merge_join_optimizations' /// Temporary disable 'partial_merge_join_left_table_buffer_bytes' without 'partial_merge_join_optimizations'
if (table_join->enablePartialMergeJoinOptimizations()) if (table_join->enablePartialMergeJoinOptimizations())
if (size_t max_bytes = table_join->maxBytesInLeftBuffer()) if (size_t max_bytes = table_join->maxBytesInLeftBuffer())

View File

@ -118,6 +118,8 @@ private:
Names lowcard_right_keys; Names lowcard_right_keys;
Poco::Logger * log;
void changeLeftColumns(Block & block, MutableColumns && columns) const; void changeLeftColumns(Block & block, MutableColumns && columns) const;
void addRightColumns(Block & block, MutableColumns && columns); void addRightColumns(Block & block, MutableColumns && columns);

View File

@ -6,6 +6,7 @@
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTColumnsMatcher.h> #include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTQualifiedAsterisk.h> #include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Interpreters/IdentifierSemantic.h> #include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/getTableExpressions.h> #include <Interpreters/getTableExpressions.h>
#include <Interpreters/InterpreterSelectQuery.h> #include <Interpreters/InterpreterSelectQuery.h>
@ -37,13 +38,44 @@ void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_q
for (size_t index = 0; index < internal_select_list.size(); ++index) for (size_t index = 0; index < internal_select_list.size(); ++index)
{ {
if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>()) if (auto * child_union = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
{
visit(*child_union, internal_select_list[index]); visit(*child_union, internal_select_list[index]);
else }
else if (auto * child_select = internal_select_list[index]->as<ASTSelectQuery>())
{
visitInternalSelect(index, *child_select, internal_select_list[index]);
}
else if (auto * child_intersect_except = internal_select_list[index]->as<ASTSelectIntersectExceptQuery>())
{
visit(*child_intersect_except, internal_select_list[index]);
}
}
}
void PredicateRewriteVisitorData::visitInternalSelect(size_t index, ASTSelectQuery & select_node, ASTPtr & node)
{ {
if (index == 0) if (index == 0)
visitFirstInternalSelect(*internal_select_list[0]->as<ASTSelectQuery>(), internal_select_list[0]); visitFirstInternalSelect(select_node, node);
else else
visitOtherInternalSelect(*internal_select_list[index]->as<ASTSelectQuery>(), internal_select_list[index]); visitOtherInternalSelect(select_node, node);
}
void PredicateRewriteVisitorData::visit(ASTSelectIntersectExceptQuery & intersect_except_query, ASTPtr &)
{
auto internal_select_list = intersect_except_query.getListOfSelects();
for (size_t index = 0; index < internal_select_list.size(); ++index)
{
if (auto * union_node = internal_select_list[index]->as<ASTSelectWithUnionQuery>())
{
visit(*union_node, internal_select_list[index]);
}
else if (auto * select_node = internal_select_list[index]->as<ASTSelectQuery>())
{
visitInternalSelect(index, *select_node, internal_select_list[index]);
}
else if (auto * intersect_node = internal_select_list[index]->as<ASTSelectIntersectExceptQuery>())
{
visit(*intersect_node, internal_select_list[index]);
} }
} }
} }

View File

@ -10,6 +10,8 @@
namespace DB namespace DB
{ {
class ASTSelectIntersectExceptQuery;
class PredicateRewriteVisitorData : WithContext class PredicateRewriteVisitorData : WithContext
{ {
public: public:
@ -40,7 +42,11 @@ private:
void visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &); void visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &);
void visit(ASTSelectIntersectExceptQuery & intersect_except_query, ASTPtr &);
bool rewriteSubquery(ASTSelectQuery & subquery, const Names & inner_columns); bool rewriteSubquery(ASTSelectQuery & subquery, const Names & inner_columns);
void visitInternalSelect(size_t index, ASTSelectQuery & select_node, ASTPtr & node);
}; };
using PredicateRewriteMatcher = OneTypeMatcher<PredicateRewriteVisitorData, PredicateRewriteVisitorData::needChild>; using PredicateRewriteMatcher = OneTypeMatcher<PredicateRewriteVisitorData, PredicateRewriteVisitorData::needChild>;

View File

@ -3,6 +3,7 @@
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/DatabaseAndTableWithAlias.h> #include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Parsers/ASTSelectWithUnionQuery.h> #include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTKillQueryQuery.h> #include <Parsers/ASTKillQueryQuery.h>
#include <Parsers/queryNormalization.h> #include <Parsers/queryNormalization.h>

View File

@ -38,7 +38,13 @@ ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const S
return makeASTFunction("sum", makeASTFunction("not", ast)); return makeASTFunction("sum", makeASTFunction("not", ast));
} }
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformToSubcolumn)>> function_to_subcolumn = ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg)
{
auto ast = transformToSubcolumn(name_in_storage, subcolumn_name);
return makeASTFunction("has", ast, arg);
}
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn =
{ {
{"length", {TypeIndex::Array, "size0", transformToSubcolumn}}, {"length", {TypeIndex::Array, "size0", transformToSubcolumn}},
{"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}}, {"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}},
@ -50,31 +56,51 @@ const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transfo
{"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}}, {"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}},
}; };
const std::unordered_map<String, std::tuple<TypeIndex, String, decltype(&transformMapContainsToSubcolumn)>> binary_function_to_subcolumn
{
{"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}},
};
} }
void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const
{ {
const auto & arguments = function.arguments->children; const auto & arguments = function.arguments->children;
if (arguments.size() != 1) if (arguments.empty() || arguments.size() > 2)
return; return;
const auto * identifier = arguments[0]->as<ASTIdentifier>(); const auto * identifier = arguments[0]->as<ASTIdentifier>();
if (!identifier) if (!identifier)
return; return;
auto it = function_to_subcolumn.find(function.name);
if (it == function_to_subcolumn.end())
return;
const auto & [type_id, subcolumn_name, transformer] = it->second;
const auto & columns = metadata_snapshot->getColumns(); const auto & columns = metadata_snapshot->getColumns();
const auto & name_in_storage = identifier->name(); const auto & name_in_storage = identifier->name();
if (columns.has(name_in_storage) if (!columns.has(name_in_storage))
&& columns.get(name_in_storage).type->getTypeId() == type_id) return;
TypeIndex column_type_id = columns.get(name_in_storage).type->getTypeId();
if (arguments.size() == 1)
{ {
auto it = unary_function_to_subcolumn.find(function.name);
if (it != unary_function_to_subcolumn.end())
{
const auto & [type_id, subcolumn_name, transformer] = it->second;
if (column_type_id == type_id)
ast = transformer(name_in_storage, subcolumn_name); ast = transformer(name_in_storage, subcolumn_name);
} }
} }
else
{
auto it = binary_function_to_subcolumn.find(function.name);
if (it != binary_function_to_subcolumn.end())
{
const auto & [type_id, subcolumn_name, transformer] = it->second;
if (column_type_id == type_id)
ast = transformer(name_in_storage, subcolumn_name, arguments[1]);
}
}
}
} }

View File

@ -108,6 +108,16 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_)
{ {
} }
void TableJoin::resetKeys()
{
clauses.clear();
key_asts_left.clear();
key_asts_right.clear();
left_type_map.clear();
right_type_map.clear();
}
void TableJoin::resetCollected() void TableJoin::resetCollected()
{ {
clauses.clear(); clauses.clear();
@ -224,6 +234,13 @@ Names TableJoin::requiredJoinedNames() const
for (const auto & joined_column : columns_added_by_join) for (const auto & joined_column : columns_added_by_join)
required_columns_set.insert(joined_column.name); required_columns_set.insert(joined_column.name);
/*
* In case of `SELECT count() FROM ... JOIN .. ON NULL` required columns set for right table is empty.
* But we have to get at least one column from right table to know the number of rows.
*/
if (required_columns_set.empty() && !columns_from_joined_table.empty())
return {columns_from_joined_table.begin()->name};
return Names(required_columns_set.begin(), required_columns_set.end()); return Names(required_columns_set.begin(), required_columns_set.end());
} }
@ -352,9 +369,7 @@ bool TableJoin::sameStrictnessAndKind(ASTTableJoin::Strictness strictness_, ASTT
bool TableJoin::oneDisjunct() const bool TableJoin::oneDisjunct() const
{ {
if (!isCrossOrComma(kind())) return clauses.size() == 1;
assert(!clauses.empty());
return clauses.size() <= 1;
} }
bool TableJoin::allowMergeJoin() const bool TableJoin::allowMergeJoin() const
@ -650,4 +665,10 @@ void TableJoin::assertHasOneOnExpr() const
} }
} }
void TableJoin::resetToCross()
{
this->resetKeys();
this->table_join.kind = ASTTableJoin::Kind::Cross;
}
} }

View File

@ -48,7 +48,6 @@ enum class JoinTableSide
class TableJoin class TableJoin
{ {
public: public:
using NameToTypeMap = std::unordered_map<String, DataTypePtr>; using NameToTypeMap = std::unordered_map<String, DataTypePtr>;
@ -285,6 +284,10 @@ public:
Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const; Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;
String renamedRightColumnName(const String & name) const; String renamedRightColumnName(const String & name) const;
void resetKeys();
void resetToCross();
std::unordered_map<String, String> leftToRightKeyRemap() const; std::unordered_map<String, String> leftToRightKeyRemap() const;
void setStorageJoin(std::shared_ptr<StorageJoin> storage); void setStorageJoin(std::shared_ptr<StorageJoin> storage);

View File

@ -1,3 +1,4 @@
#include <algorithm>
#include <Core/Settings.h> #include <Core/Settings.h>
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
@ -22,6 +23,7 @@
#include <Interpreters/getTableExpressions.h> #include <Interpreters/getTableExpressions.h>
#include <Interpreters/TreeOptimizer.h> #include <Interpreters/TreeOptimizer.h>
#include <Interpreters/replaceAliasColumnsInQuery.h> #include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/PredicateExpressionsOptimizer.h> #include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Parsers/ASTExpressionList.h> #include <Parsers/ASTExpressionList.h>
@ -33,6 +35,8 @@
#include <DataTypes/NestedUtils.h> #include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
@ -564,9 +568,68 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul
out_table_join = table_join; out_table_join = table_join;
} }
/// Evaluate expression and return boolean value if it can be interpreted as bool.
/// Only UInt8 or NULL are allowed.
/// Returns `false` for 0 or NULL values, `true` for any non-negative value.
std::optional<bool> tryEvaluateConstCondition(ASTPtr expr, ContextPtr context)
{
if (!expr)
return {};
Field eval_res;
DataTypePtr eval_res_type;
try
{
std::tie(eval_res, eval_res_type) = evaluateConstantExpression(expr, context);
}
catch (DB::Exception &)
{
/// not a constant expression
return {};
}
/// UInt8, maybe Nullable, maybe LowCardinality, and NULL are allowed
eval_res_type = removeNullable(removeLowCardinality(eval_res_type));
if (auto which = WhichDataType(eval_res_type); !which.isUInt8() && !which.isNothing())
return {};
if (eval_res.isNull())
return false;
UInt8 res = eval_res.template safeGet<UInt8>();
return res > 0;
}
bool tryJoinOnConst(TableJoin & analyzed_join, ASTPtr & on_expression, ContextPtr context)
{
bool join_on_value;
if (auto eval_const_res = tryEvaluateConstCondition(on_expression, context))
join_on_value = *eval_const_res;
else
return false;
if (!analyzed_join.forceHashJoin())
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"JOIN ON constant ({}) supported only with join algorithm 'hash'",
queryToString(on_expression));
on_expression = nullptr;
if (join_on_value)
{
LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join");
analyzed_join.resetToCross();
}
else
{
LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join");
analyzed_join.resetKeys();
}
return true;
}
/// Find the columns that are obtained by JOIN. /// Find the columns that are obtained by JOIN.
void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_join, void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
const TablesWithColumns & tables, const Aliases & aliases) const TablesWithColumns & tables, const Aliases & aliases, ContextPtr context)
{ {
assert(tables.size() >= 2); assert(tables.size() >= 2);
@ -599,18 +662,30 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_
assert(analyzed_join.oneDisjunct()); assert(analyzed_join.oneDisjunct());
} }
if (analyzed_join.getClauses().empty()) auto check_keys_empty = [] (auto e) { return e.key_names_left.empty(); };
throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"Cannot get JOIN keys from JOIN ON section: '{}'",
queryToString(table_join.on_expression));
for (const auto & onexpr : analyzed_join.getClauses()) /// All clauses should to have keys or be empty simultaneously
bool all_keys_empty = std::all_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty);
if (all_keys_empty)
{ {
if (onexpr.key_names_left.empty()) /// Try join on constant (cross or empty join) or fail
if (is_asof)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression));
bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context);
if (!join_on_const_ok)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression));
}
else
{
bool any_keys_empty = std::any_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty);
if (any_keys_empty)
throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"Cannot get JOIN keys from JOIN ON section: '{}'", "Cannot get JOIN keys from JOIN ON section: '{}'",
queryToString(table_join.on_expression)); queryToString(table_join.on_expression));
}
if (is_asof) if (is_asof)
{ {
@ -621,7 +696,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_
if (!analyzed_join.oneDisjunct() && !analyzed_join.forceHashJoin()) if (!analyzed_join.oneDisjunct() && !analyzed_join.forceHashJoin())
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section");
}
} }
} }
@ -1052,7 +1127,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
auto * table_join_ast = select_query->join() ? select_query->join()->table_join->as<ASTTableJoin>() : nullptr; auto * table_join_ast = select_query->join() ? select_query->join()->table_join->as<ASTTableJoin>() : nullptr;
if (table_join_ast && tables_with_columns.size() >= 2) if (table_join_ast && tables_with_columns.size() >= 2)
collectJoinedColumns(*result.analyzed_join, *table_join_ast, tables_with_columns, result.aliases); collectJoinedColumns(*result.analyzed_join, *table_join_ast, tables_with_columns, result.aliases, getContext());
result.aggregates = getAggregates(query, *select_query); result.aggregates = getAggregates(query, *select_query);
result.window_function_asts = getWindowFunctions(query, *select_query); result.window_function_asts = getWindowFunctions(query, *select_query);

View File

@ -1,18 +1,18 @@
#include <Interpreters/join_common.h> #include <Interpreters/join_common.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnLowCardinality.h> #include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/ActionsDAG.h> #include <Interpreters/ActionsDAG.h>
#include <Interpreters/TableJoin.h> #include <Interpreters/TableJoin.h>
#include <IO/WriteHelpers.h>
namespace DB namespace DB
{ {
@ -492,23 +492,27 @@ bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type)
return left_type_strict->equals(*right_type_strict); return left_type_strict->equals(*right_type_strict);
} }
ColumnPtr getColumnAsMask(const Block & block, const String & column_name) JoinMask getColumnAsMask(const Block & block, const String & column_name)
{ {
if (column_name.empty()) if (column_name.empty())
return nullptr; return JoinMask(true);
const auto & src_col = block.getByName(column_name); const auto & src_col = block.getByName(column_name);
DataTypePtr col_type = recursiveRemoveLowCardinality(src_col.type); DataTypePtr col_type = recursiveRemoveLowCardinality(src_col.type);
if (isNothing(col_type)) if (isNothing(col_type))
return ColumnUInt8::create(block.rows(), 0); return JoinMask(false);
const auto & join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst()); if (const auto * const_cond = checkAndGetColumn<ColumnConst>(*src_col.column))
{
return JoinMask(const_cond->getBool(0));
}
ColumnPtr join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst());
if (const auto * nullable_col = typeid_cast<const ColumnNullable *>(join_condition_col.get())) if (const auto * nullable_col = typeid_cast<const ColumnNullable *>(join_condition_col.get()))
{ {
if (isNothing(assert_cast<const DataTypeNullable &>(*col_type).getNestedType())) if (isNothing(assert_cast<const DataTypeNullable &>(*col_type).getNestedType()))
return ColumnUInt8::create(block.rows(), 0); return JoinMask(false);
/// Return nested column with NULL set to false /// Return nested column with NULL set to false
const auto & nest_col = assert_cast<const ColumnUInt8 &>(nullable_col->getNestedColumn()); const auto & nest_col = assert_cast<const ColumnUInt8 &>(nullable_col->getNestedColumn());
@ -517,10 +521,10 @@ ColumnPtr getColumnAsMask(const Block & block, const String & column_name)
auto res = ColumnUInt8::create(nullable_col->size(), 0); auto res = ColumnUInt8::create(nullable_col->size(), 0);
for (size_t i = 0, sz = nullable_col->size(); i < sz; ++i) for (size_t i = 0, sz = nullable_col->size(); i < sz; ++i)
res->getData()[i] = !null_map.getData()[i] && nest_col.getData()[i]; res->getData()[i] = !null_map.getData()[i] && nest_col.getData()[i];
return res; return JoinMask(std::move(res));
} }
else else
return join_condition_col; return JoinMask(std::move(join_condition_col));
} }
@ -580,11 +584,10 @@ NotJoinedBlocks::NotJoinedBlocks(std::unique_ptr<RightColumnsFiller> filler_,
} }
if (column_indices_left.size() + column_indices_right.size() + same_result_keys.size() != result_sample_block.columns()) if (column_indices_left.size() + column_indices_right.size() + same_result_keys.size() != result_sample_block.columns())
throw Exception("Error in columns mapping in RIGHT|FULL JOIN. Left: " + toString(column_indices_left.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR,
", right: " + toString(column_indices_right.size()) + "Error in columns mapping in RIGHT|FULL JOIN. Left: {}, right: {}, same: {}, result: {}",
", same: " + toString(same_result_keys.size()) + column_indices_left.size(), column_indices_right.size(),
", result: " + toString(result_sample_block.columns()), same_result_keys.size(), result_sample_block.columns());
ErrorCodes::LOGICAL_ERROR);
} }
void NotJoinedBlocks::setRightIndex(size_t right_pos, size_t result_position) void NotJoinedBlocks::setRightIndex(size_t right_pos, size_t result_position)

View File

@ -19,6 +19,46 @@ using UInt8ColumnDataPtr = const ColumnUInt8::Container *;
namespace JoinCommon namespace JoinCommon
{ {
/// Store boolean column handling constant value without materializing
/// Behaves similar to std::variant<bool, ColumnPtr>, but provides more convenient specialized interface
class JoinMask
{
public:
explicit JoinMask(bool value)
: column(nullptr)
, const_value(value)
{}
explicit JoinMask(ColumnPtr col)
: column(col)
, const_value(false)
{}
bool isConstant() { return !column; }
/// Return data if mask is not constant
UInt8ColumnDataPtr getData()
{
if (column)
return &assert_cast<const ColumnUInt8 &>(*column).getData();
return nullptr;
}
inline bool isRowFiltered(size_t row) const
{
if (column)
return !assert_cast<const ColumnUInt8 &>(*column).getData()[row];
return !const_value;
}
private:
ColumnPtr column;
/// Used if column is null
bool const_value;
};
bool canBecomeNullable(const DataTypePtr & type); bool canBecomeNullable(const DataTypePtr & type);
DataTypePtr convertTypeToNullable(const DataTypePtr & type); DataTypePtr convertTypeToNullable(const DataTypePtr & type);
void convertColumnToNullable(ColumnWithTypeAndName & column); void convertColumnToNullable(ColumnWithTypeAndName & column);
@ -58,7 +98,7 @@ void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count);
bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type); bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type);
/// Return mask array of type ColumnUInt8 for specified column. Source should have type UInt8 or Nullable(UInt8). /// Return mask array of type ColumnUInt8 for specified column. Source should have type UInt8 or Nullable(UInt8).
ColumnPtr getColumnAsMask(const Block & block, const String & column_name); JoinMask getColumnAsMask(const Block & block, const String & column_name);
/// Split key and other columns by keys name list /// Split key and other columns by keys name list
void splitAdditionalColumns(const Names & key_names, const Block & sample_block, Block & block_keys, Block & block_others); void splitAdditionalColumns(const Names & key_names, const Block & sample_block, Block & block_keys, Block & block_others);

View File

@ -94,10 +94,25 @@ namespace
} }
} }
void formatSettings(const IAST & settings, const IAST::FormatSettings & format) void formatSettings(const ASTPtr & settings, const ASTPtr & base_backup_name, const IAST::FormatSettings & format)
{ {
if (!settings && !base_backup_name)
return;
format.ostr << (format.hilite ? IAST::hilite_keyword : "") << " SETTINGS " << (format.hilite ? IAST::hilite_none : ""); format.ostr << (format.hilite ? IAST::hilite_keyword : "") << " SETTINGS " << (format.hilite ? IAST::hilite_none : "");
settings.format(format); bool empty = true;
if (base_backup_name)
{
format.ostr << "base_backup = ";
base_backup_name->format(format);
empty = false;
}
if (settings)
{
if (!empty)
format.ostr << ", ";
settings->format(format);
}
} }
} }
@ -120,11 +135,11 @@ void ASTBackupQuery::formatImpl(const FormatSettings & format, FormatState &, Fo
formatElements(elements, kind, format); formatElements(elements, kind, format);
if (settings)
formatSettings(*settings, format);
format.ostr << (format.hilite ? hilite_keyword : "") << ((kind == Kind::BACKUP) ? " TO " : " FROM ") << (format.hilite ? hilite_none : ""); format.ostr << (format.hilite ? hilite_keyword : "") << ((kind == Kind::BACKUP) ? " TO " : " FROM ") << (format.hilite ? hilite_none : "");
format.ostr << " " << quoteString(backup_name); backup_name->format(format);
if (settings || base_backup_name)
formatSettings(settings, base_backup_name, format);
} }
} }

View File

@ -16,8 +16,9 @@ using DatabaseAndTableName = std::pair<String, String>;
* TEMPORARY TABLE table_name [AS table_name_in_backup] * TEMPORARY TABLE table_name [AS table_name_in_backup]
* ALL TEMPORARY TABLES | * ALL TEMPORARY TABLES |
* EVERYTHING } [,...] * EVERYTHING } [,...]
* TO 'backup_name' * TO { File('path/') |
* SETTINGS base_backup='base_backup_name' * Disk('disk_name', 'path/')
* [SETTINGS base_backup = {File(...) | Disk(...)}]
* *
* RESTORE { TABLE [db.]table_name_in_backup [INTO [db.]table_name] [PARTITION[S] partition_expr [,...]] | * RESTORE { TABLE [db.]table_name_in_backup [INTO [db.]table_name] [PARTITION[S] partition_expr [,...]] |
* DICTIONARY [db.]dictionary_name_in_backup [INTO [db.]dictionary_name] | * DICTIONARY [db.]dictionary_name_in_backup [INTO [db.]dictionary_name] |
@ -26,7 +27,7 @@ using DatabaseAndTableName = std::pair<String, String>;
* TEMPORARY TABLE table_name_in_backup [INTO table_name] | * TEMPORARY TABLE table_name_in_backup [INTO table_name] |
* ALL TEMPORARY TABLES | * ALL TEMPORARY TABLES |
* EVERYTHING } [,...] * EVERYTHING } [,...]
* FROM 'backup_name' * FROM {File(...) | Disk(...)}
* *
* Notes: * Notes:
* RESTORE doesn't drop any data, it either creates a table or appends an existing table with restored data. * RESTORE doesn't drop any data, it either creates a table or appends an existing table with restored data.
@ -76,7 +77,11 @@ public:
using Elements = std::vector<Element>; using Elements = std::vector<Element>;
Elements elements; Elements elements;
String backup_name; ASTPtr backup_name;
/// Base backup. Only differences made after the base backup will be included in a newly created backup,
/// so this setting allows to make an incremental backup.
ASTPtr base_backup_name;
ASTPtr settings; ASTPtr settings;

View File

@ -15,12 +15,10 @@ ASTPtr ASTSelectIntersectExceptQuery::clone() const
res->children.push_back(child->clone()); res->children.push_back(child->clone());
res->final_operator = final_operator; res->final_operator = final_operator;
cloneOutputOptions(*res);
return res; return res;
} }
void ASTSelectIntersectExceptQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const void ASTSelectIntersectExceptQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{ {
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
@ -38,4 +36,21 @@ void ASTSelectIntersectExceptQuery::formatQueryImpl(const FormatSettings & setti
} }
} }
ASTs ASTSelectIntersectExceptQuery::getListOfSelects() const
{
/**
* Because of normalization actual number of selects is 2.
* But this is checked in InterpreterSelectIntersectExceptQuery.
*/
ASTs selects;
for (const auto & child : children)
{
if (typeid_cast<ASTSelectQuery *>(child.get())
|| typeid_cast<ASTSelectWithUnionQuery *>(child.get())
|| typeid_cast<ASTSelectIntersectExceptQuery *>(child.get()))
selects.push_back(child);
}
return selects;
}
} }

View File

@ -1,22 +1,18 @@
#pragma once #pragma once
#include <Parsers/ASTQueryWithOutput.h> #include <Parsers/ASTSelectQuery.h>
namespace DB namespace DB
{ {
class ASTSelectIntersectExceptQuery : public ASTQueryWithOutput class ASTSelectIntersectExceptQuery : public ASTSelectQuery
{ {
public: public:
String getID(char) const override { return "SelectIntersectExceptQuery"; } String getID(char) const override { return "SelectIntersectExceptQuery"; }
ASTPtr clone() const override; ASTPtr clone() const override;
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
const char * getQueryKindString() const override { return "SelectIntersectExcept"; }
enum class Operator enum class Operator
{ {
UNKNOWN, UNKNOWN,
@ -24,6 +20,12 @@ public:
EXCEPT EXCEPT
}; };
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
const char * getQueryKindString() const override { return "SelectIntersectExcept"; }
ASTs getListOfSelects() const;
/// Final operator after applying visitor. /// Final operator after applying visitor.
Operator final_operator = Operator::UNKNOWN; Operator final_operator = Operator::UNKNOWN;
}; };

View File

@ -148,18 +148,47 @@ namespace
}); });
} }
bool parseSettings(IParser::Pos & pos, Expected & expected, ASTPtr & settings) bool parseBackupName(IParser::Pos & pos, Expected & expected, ASTPtr & backup_name)
{
return ParserIdentifierWithOptionalParameters{}.parse(pos, backup_name, expected);
}
bool parseBaseBackupSetting(IParser::Pos & pos, Expected & expected, ASTPtr & base_backup_name)
{
return IParserBase::wrapParseImpl(pos, [&]
{
return ParserKeyword{"base_backup"}.ignore(pos, expected)
&& ParserToken(TokenType::Equals).ignore(pos, expected)
&& parseBackupName(pos, expected, base_backup_name);
});
}
bool parseSettings(IParser::Pos & pos, Expected & expected, ASTPtr & settings, ASTPtr & base_backup_name)
{ {
return IParserBase::wrapParseImpl(pos, [&] return IParserBase::wrapParseImpl(pos, [&]
{ {
if (!ParserKeyword{"SETTINGS"}.ignore(pos, expected)) if (!ParserKeyword{"SETTINGS"}.ignore(pos, expected))
return false; return false;
ASTPtr result; ASTPtr res_settings;
if (!ParserSetQuery{true}.parse(pos, result, expected)) ASTPtr res_base_backup_name;
auto parse_setting = [&]
{
if (!res_settings && ParserSetQuery{true}.parse(pos, res_settings, expected))
return true;
if (!res_base_backup_name && parseBaseBackupSetting(pos, expected, res_base_backup_name))
return true;
return false;
};
if (!ParserList::parseUtil(pos, expected, parse_setting, false))
return false; return false;
settings = std::move(result); settings = std::move(res_settings);
base_backup_name = std::move(res_base_backup_name);
return true; return true;
}); });
} }
@ -182,13 +211,14 @@ bool ParserBackupQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!ParserKeyword{(kind == Kind::BACKUP) ? "TO" : "FROM"}.ignore(pos, expected)) if (!ParserKeyword{(kind == Kind::BACKUP) ? "TO" : "FROM"}.ignore(pos, expected))
return false; return false;
ASTPtr ast;
if (!ParserStringLiteral{}.parse(pos, ast, expected)) ASTPtr backup_name;
if (!parseBackupName(pos, expected, backup_name))
return false; return false;
String backup_name = ast->as<ASTLiteral &>().value.safeGet<String>();
ASTPtr settings; ASTPtr settings;
parseSettings(pos, expected, settings); ASTPtr base_backup_name;
parseSettings(pos, expected, settings, base_backup_name);
auto query = std::make_shared<ASTBackupQuery>(); auto query = std::make_shared<ASTBackupQuery>();
node = query; node = query;
@ -196,6 +226,7 @@ bool ParserBackupQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->kind = kind; query->kind = kind;
query->elements = std::move(elements); query->elements = std::move(elements);
query->backup_name = std::move(backup_name); query->backup_name = std::move(backup_name);
query->base_backup_name = std::move(base_backup_name);
query->settings = std::move(settings); query->settings = std::move(settings);
return true; return true;

View File

@ -13,8 +13,9 @@ namespace DB
* TEMPORARY TABLE table_name [AS table_name_in_backup] * TEMPORARY TABLE table_name [AS table_name_in_backup]
* ALL TEMPORARY TABLES | * ALL TEMPORARY TABLES |
* EVERYTHING } [,...] * EVERYTHING } [,...]
* TO 'backup_name' * TO { File('path/') |
* [SETTINGS base_backup = 'base_backup_name'] * Disk('disk_name', 'path/')
* [SETTINGS base_backup = {FILE(...) | DISK(...)}]
* *
* RESTORE { TABLE [db.]table_name_in_backup [INTO [db.]table_name] [PARTITION[S] partition_expr [,...]] | * RESTORE { TABLE [db.]table_name_in_backup [INTO [db.]table_name] [PARTITION[S] partition_expr [,...]] |
* DICTIONARY [db.]dictionary_name_in_backup [INTO [db.]dictionary_name] | * DICTIONARY [db.]dictionary_name_in_backup [INTO [db.]dictionary_name] |
@ -23,7 +24,7 @@ namespace DB
* TEMPORARY TABLE table_name_in_backup [INTO table_name] | * TEMPORARY TABLE table_name_in_backup [INTO table_name] |
* ALL TEMPORARY TABLES | * ALL TEMPORARY TABLES |
* EVERYTHING } [,...] * EVERYTHING } [,...]
* FROM 'backup_name' * FROM {File(...) | Disk(...)}
*/ */
class ParserBackupQuery : public IParserBase class ParserBackupQuery : public IParserBase
{ {

View File

@ -23,6 +23,7 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR; extern const int SYNTAX_ERROR;
extern const int TOP_AND_LIMIT_TOGETHER; extern const int TOP_AND_LIMIT_TOGETHER;
extern const int WITH_TIES_WITHOUT_ORDER_BY; extern const int WITH_TIES_WITHOUT_ORDER_BY;
extern const int OFFSET_FETCH_WITHOUT_ORDER_BY;
} }
@ -323,7 +324,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{ {
/// OFFSET FETCH clause must exists with "ORDER BY" /// OFFSET FETCH clause must exists with "ORDER BY"
if (!order_expression_list) if (!order_expression_list)
return false; throw Exception("Can not use OFFSET FETCH clause without ORDER BY", ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY);
if (s_first.ignore(pos, expected)) if (s_first.ignore(pos, expected))
{ {

View File

@ -1,6 +1,7 @@
#include <Storages/HDFS/HDFSCommon.h> #include <Storages/HDFS/HDFSCommon.h>
#include <Poco/URI.h> #include <Poco/URI.h>
#include <boost/algorithm/string/replace.hpp> #include <boost/algorithm/string/replace.hpp>
#include <re2/re2.h>
#if USE_HDFS #if USE_HDFS
#include <Common/ShellCommand.h> #include <Common/ShellCommand.h>
@ -21,6 +22,7 @@ namespace ErrorCodes
} }
const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs"; const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs";
const String HDFS_URL_REGEXP = "^hdfs://[^:/]*:[0-9]*/.*";
void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config,
const String & config_path, bool isUser) const String & config_path, bool isUser)
@ -197,6 +199,12 @@ HDFSFSPtr createHDFSFS(hdfsBuilder * builder)
return fs; return fs;
} }
void checkHDFSURL(const String & url)
{
if (!re2::RE2::FullMatch(url, HDFS_URL_REGEXP))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. It should have structure 'hdfs://<host_name>:<port>/<path>'", url);
}
} }
#endif #endif

View File

@ -98,5 +98,9 @@ using HDFSFSPtr = std::unique_ptr<std::remove_pointer_t<hdfsFS>, detail::HDFSFsD
HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::AbstractConfiguration &); HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::AbstractConfiguration &);
HDFSFSPtr createHDFSFS(hdfsBuilder * builder); HDFSFSPtr createHDFSFS(hdfsBuilder * builder);
/// Check that url satisfy structure 'hdfs://<host_name>:<port>/<path>'
/// and throw exception if it doesn't;
void checkHDFSURL(const String & url);
} }
#endif #endif

View File

@ -67,6 +67,7 @@ StorageHDFS::StorageHDFS(
, partition_by(partition_by_) , partition_by(partition_by_)
{ {
context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); context_->getRemoteHostFilter().checkURL(Poco::URI(uri));
checkHDFSURL(uri);
StorageInMemoryMetadata storage_metadata; StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_); storage_metadata.setColumns(columns_);

View File

@ -53,7 +53,7 @@ void DropPartsRanges::removeDropRange(const ReplicatedMergeTreeLogEntryPtr & ent
bool DropPartsRanges::hasDropRange(const MergeTreePartInfo & new_drop_range_info) const bool DropPartsRanges::hasDropRange(const MergeTreePartInfo & new_drop_range_info) const
{ {
for (const auto & [znode_name, drop_range] : drop_ranges) for (const auto & [_, drop_range] : drop_ranges)
{ {
if (drop_range.contains(new_drop_range_info)) if (drop_range.contains(new_drop_range_info))
return true; return true;

View File

@ -3574,7 +3574,7 @@ RestoreDataTasks MergeTreeData::restoreDataPartsFromBackup(const BackupPtr & bac
{ {
RestoreDataTasks restore_tasks; RestoreDataTasks restore_tasks;
Strings part_names = backup->list(data_path_in_backup); Strings part_names = backup->listFiles(data_path_in_backup);
for (const String & part_name : part_names) for (const String & part_name : part_names)
{ {
const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version); const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version);
@ -3586,9 +3586,9 @@ RestoreDataTasks MergeTreeData::restoreDataPartsFromBackup(const BackupPtr & bac
continue; continue;
UInt64 total_size_of_part = 0; UInt64 total_size_of_part = 0;
Strings filenames = backup->list(data_path_in_backup + part_name + "/", ""); Strings filenames = backup->listFiles(data_path_in_backup + part_name + "/", "");
for (const String & filename : filenames) for (const String & filename : filenames)
total_size_of_part += backup->getSize(data_path_in_backup + part_name + "/" + filename); total_size_of_part += backup->getFileSize(data_path_in_backup + part_name + "/" + filename);
std::shared_ptr<IReservation> reservation = getStoragePolicy()->reserveAndCheck(total_size_of_part); std::shared_ptr<IReservation> reservation = getStoragePolicy()->reserveAndCheck(total_size_of_part);
@ -3612,7 +3612,7 @@ RestoreDataTasks MergeTreeData::restoreDataPartsFromBackup(const BackupPtr & bac
for (const String & filename : filenames) for (const String & filename : filenames)
{ {
auto backup_entry = backup->read(data_path_in_backup + part_name + "/" + filename); auto backup_entry = backup->readFile(data_path_in_backup + part_name + "/" + filename);
auto read_buffer = backup_entry->getReadBuffer(); auto read_buffer = backup_entry->getReadBuffer();
auto write_buffer = disk->writeFile(temp_part_dir + "/" + filename); auto write_buffer = disk->writeFile(temp_part_dir + "/" + filename);
copyData(*read_buffer, *write_buffer); copyData(*read_buffer, *write_buffer);

View File

@ -192,21 +192,33 @@ void MergeTreeReaderStream::seekToStart()
void MergeTreeReaderStream::adjustForRange(MarkRange range) void MergeTreeReaderStream::adjustForRange(MarkRange range)
{ {
/**
* Note: this method is called multiple times for the same range of marks -- each time we
* read from stream, but we must update last_right_offset only if it is bigger than
* the last one to avoid redundantly cancelling prefetches.
*/
auto [right_offset, mark_range_bytes] = getRightOffsetAndBytesRange(range.begin, range.end); auto [right_offset, mark_range_bytes] = getRightOffsetAndBytesRange(range.begin, range.end);
if (!right_offset) if (!right_offset)
{ {
if (last_right_offset && *last_right_offset == 0)
return;
last_right_offset = 0; // Zero value means the end of file.
if (cached_buffer) if (cached_buffer)
cached_buffer->setReadUntilEnd(); cached_buffer->setReadUntilEnd();
if (non_cached_buffer) if (non_cached_buffer)
non_cached_buffer->setReadUntilEnd(); non_cached_buffer->setReadUntilEnd();
} }
else if (right_offset > last_right_offset) else
{ {
if (last_right_offset && right_offset <= last_right_offset.value())
return;
last_right_offset = right_offset; last_right_offset = right_offset;
if (cached_buffer) if (cached_buffer)
cached_buffer->setReadUntilPosition(last_right_offset); cached_buffer->setReadUntilPosition(right_offset);
if (non_cached_buffer) if (non_cached_buffer)
non_cached_buffer->setReadUntilPosition(last_right_offset); non_cached_buffer->setReadUntilPosition(right_offset);
} }
} }

Some files were not shown because too many files have changed in this diff Show More