Merge branch 'master' into alternative-keeper-configs

This commit is contained in:
Antonio Andelic 2023-03-27 10:00:21 +02:00 committed by GitHub
commit 93f5920e02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
388 changed files with 11692 additions and 3232 deletions

@ -1 +1 @@
Subproject commit d80af319f5f047067b956b2fe93a6c00038c1e0d
Subproject commit 4bfaeb31dd0ef13f025221f93c138974a3e0a22a

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2
Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30

View File

@ -3,7 +3,9 @@ set -ex
set -o pipefail
trap "exit" INT TERM
trap 'kill $(jobs -pr) ||:' EXIT
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
BUILD_NAME=${BUILD_NAME:-package_release}
export S3_URL BUILD_NAME
mkdir db0 ||:
mkdir left ||:
@ -28,8 +30,9 @@ function download
# Historically there were various paths for the performance test package.
# Test all of them.
declare -a urls_to_try=(
"https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
"https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
"$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
"$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
"$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
)
for path in "${urls_to_try[@]}"

View File

@ -6,11 +6,7 @@ export CHPC_CHECK_START_TIMESTAMP
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
BUILD_NAME=${BUILD_NAME:-package_release}
COMMON_BUILD_PREFIX="/clickhouse_build_check"
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
COMMON_BUILD_PREFIX=""
fi
export S3_URL BUILD_NAME
# Sometimes AWS responde with DNS error and it's impossible to retry it with
# current curl version options.
@ -66,8 +62,9 @@ function find_reference_sha
# test all of them.
unset found
declare -a urls_to_try=(
"https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
"https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tgz"
"$S3_URL/PRs/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
"$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
"$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tgz"
)
for path in "${urls_to_try[@]}"
do
@ -92,10 +89,15 @@ chmod 777 workspace output
cd workspace
# Download the package for the version we are going to test.
if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
then
right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
fi
# A temporary solution for migrating into PRs directory
for prefix in "$S3_URL/PRs" "$S3_URL";
do
if curl_with_retry "$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
then
right_path="$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
break
fi
done
mkdir right
wget -nv -nd -c "$right_path" -O- | tar -C right --no-same-owner --strip-components=1 --zstd --extract --verbose

View File

@ -26,6 +26,7 @@ logging.basicConfig(
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
# Thread executor that does not hides exception that happens during function
# execution, and rethrows it after join()
class SafeThread(Thread):
@ -158,6 +159,7 @@ for e in subst_elems:
available_parameters[name] = values
# Takes parallel lists of templates, substitutes them with all combos of
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS

View File

@ -670,7 +670,6 @@ if args.report == "main":
)
elif args.report == "all-queries":
print((header_template.format()))
add_tested_commits()

View File

@ -10,31 +10,38 @@ import requests
import tempfile
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
AVAILABLE_DATASETS = {
'hits': 'hits_v1.tar',
'visits': 'visits_v1.tar',
"hits": "hits_v1.tar",
"visits": "visits_v1.tar",
}
RETRIES_COUNT = 5
def _get_temp_file_name():
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
return os.path.join(
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
)
def build_url(base_url, dataset):
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
def dowload_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(RETRIES_COUNT):
try:
with open(path, 'wb') as f:
with open(path, "wb") as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get('content-length')
total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress")
logging.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
sys.stdout.write(
"\r[{}{}] {}%".format(
"=" * done, " " * (50 - done), percent
)
)
sys.stdout.flush()
break
except Exception as ex:
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
if os.path.exists(path):
os.remove(path)
else:
raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
raise Exception(
"Cannot download dataset from {}, all retries exceeded".format(url)
)
sys.stdout.write("\n")
logging.info("Downloading finished")
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
with tarfile.open(tar_path, 'r') as comp_file:
logging.info(
"Will unpack data from temp path %s to clickhouse db %s",
tar_path,
clickhouse_path,
)
with tarfile.open(tar_path, "r") as comp_file:
comp_file.extractall(path=clickhouse_path)
logging.info("Unpack finished")
@ -72,15 +90,21 @@ if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
description="Simple tool for dowloading datasets for clickhouse from S3")
description="Simple tool for dowloading datasets for clickhouse from S3"
)
parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
parser.add_argument('--url-prefix', default=DEFAULT_URL)
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
parser.add_argument(
"--dataset-names",
required=True,
nargs="+",
choices=list(AVAILABLE_DATASETS.keys()),
)
parser.add_argument("--url-prefix", default=DEFAULT_URL)
parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
args = parser.parse_args()
datasets = args.dataset_names
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
logging.info("Will fetch following datasets: %s", ", ".join(datasets))
for dataset in datasets:
logging.info("Processing %s", dataset)
temp_archive_path = _get_temp_file_name()
@ -92,10 +116,11 @@ if __name__ == "__main__":
logging.info("Some exception occured %s", str(ex))
raise
finally:
logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
logging.info(
"Will remove downloaded file %s from filesystem if it exists",
temp_archive_path,
)
if os.path.exists(temp_archive_path):
os.remove(temp_archive_path)
logging.info("Processing of %s finished", dataset)
logging.info("Fetch finished, enjoy your tables!")

View File

@ -170,6 +170,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
fi
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
# Compress tables.

View File

@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
aspell \
curl \
git \
file \
libxml2-utils \
moreutils \
python3-fuzzywuzzy \
python3-pip \
shellcheck \
yamllint \
&& pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip

View File

@ -377,8 +377,9 @@ CREATE TABLE table_name
i32 Int32,
s String,
...
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4
INDEX idx1 u64 TYPE bloom_filter GRANULARITY 3,
INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 3,
INDEX idx3 u64 * length(s) TYPE set(1000) GRANULARITY 4
) ENGINE = MergeTree()
...
```
@ -386,8 +387,25 @@ CREATE TABLE table_name
Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries:
``` sql
SELECT count() FROM table WHERE s < 'z'
SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
SELECT count() FROM table WHERE u64 == 10;
SELECT count() FROM table WHERE u64 * i32 >= 1234
SELECT count() FROM table WHERE u64 * length(s) == 1234
```
Data skipping indexes can also be created on composite columns:
```sql
-- on columns of type Map:
INDEX map_key_index mapKeys(map_column) TYPE bloom_filter
INDEX map_value_index mapValues(map_column) TYPE bloom_filter
-- on columns of type Tuple:
INDEX tuple_1_index tuple_column.1 TYPE bloom_filter
INDEX tuple_2_index tuple_column.2 TYPE bloom_filter
-- on columns of type Nested:
INDEX nested_1_index col.nested_col1 TYPE bloom_filter
INDEX nested_2_index col.nested_col2 TYPE bloom_filter
```
### Available Types of Indices {#available-types-of-indices}
@ -432,20 +450,6 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran
- An experimental index to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
- An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details.
## Example of index creation for Map data type
```
INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1
INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1
```
``` sql
INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4
```
### Functions Support {#functions-support}
Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.

View File

@ -1,7 +0,0 @@
position: 1
label: 'Example Datasets'
collapsible: true
collapsed: true
link:
type: doc
id: en/getting-started/example-datasets/

View File

@ -0,0 +1,265 @@
---
slug: /en/getting-started/example-datasets/covid19
sidebar_label: COVID-19 Open-Data
---
# COVID-19 Open-Data
COVID-19 Open-Data attempts to assemble the largest Covid-19 epidemiological database, in addition to a powerful set of expansive covariates. It includes open, publicly sourced, licensed data relating to demographics, economy, epidemiology, geography, health, hospitalizations, mobility, government response, weather, and more.
The details are in GitHub [here](https://github.com/GoogleCloudPlatform/covid-19-open-data).
It's easy to insert this data into ClickHouse...
:::note
The following commands were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). You can easily run them on a local install as well.
:::
1. Let's see what the data looks like:
```sql
DESCRIBE url(
'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
'CSVWithNames'
);
```
The CSV file has 10 columns:
```response
┌─name─────────────────┬─type─────────────┐
│ date │ Nullable(String) │
│ location_key │ Nullable(String) │
│ new_confirmed │ Nullable(Int64) │
│ new_deceased │ Nullable(Int64) │
│ new_recovered │ Nullable(Int64) │
│ new_tested │ Nullable(Int64) │
│ cumulative_confirmed │ Nullable(Int64) │
│ cumulative_deceased │ Nullable(Int64) │
│ cumulative_recovered │ Nullable(Int64) │
│ cumulative_tested │ Nullable(Int64) │
└──────────────────────┴──────────────────┘
10 rows in set. Elapsed: 0.745 sec.
```
2. Now let's view some of the rows:
```sql
SELECT *
FROM url('https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv')
LIMIT 100;
```
Notice the `url` function easily reads data from a CSV file:
```response
┌─c1─────────┬─c2───────────┬─c3────────────┬─c4───────────┬─c5────────────┬─c6─────────┬─c7───────────────────┬─c8──────────────────┬─c9───────────────────┬─c10───────────────┐
│ date │ location_key │ new_confirmed │ new_deceased │ new_recovered │ new_tested │ cumulative_confirmed │ cumulative_deceased │ cumulative_recovered │ cumulative_tested │
│ 2020-04-03 │ AD │ 24 │ 1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 466 │ 17 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
│ 2020-04-04 │ AD │ 57 │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 523 │ 17 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
│ 2020-04-05 │ AD │ 17 │ 4 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 540 │ 21 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
│ 2020-04-06 │ AD │ 11 │ 1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 551 │ 22 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
│ 2020-04-07 │ AD │ 15 │ 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 566 │ 24 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
│ 2020-04-08 │ AD │ 23 │ 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 589 │ 26 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└────────────┴──────────────┴───────────────┴──────────────┴───────────────┴────────────┴──────────────────────┴─────────────────────┴──────────────────────┴───────────────────┘
```
3. We will create a table now that we know what the data looks like:
```sql
CREATE TABLE covid19 (
date Date,
location_key LowCardinality(String),
new_confirmed Int32,
new_deceased Int32,
new_recovered Int32,
new_tested Int32,
cumulative_confirmed Int32,
cumulative_deceased Int32,
cumulative_recovered Int32,
cumulative_tested Int32
)
ENGINE = MergeTree
ORDER BY (location_key, date);
```
4. The following command inserts the entire dataset into the `covid19` table:
```sql
INSERT INTO covid19
SELECT *
FROM
url(
'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
CSVWithNames,
'date Date,
location_key LowCardinality(String),
new_confirmed Int32,
new_deceased Int32,
new_recovered Int32,
new_tested Int32,
cumulative_confirmed Int32,
cumulative_deceased Int32,
cumulative_recovered Int32,
cumulative_tested Int32'
);
```
5. It goes pretty quick - let's see how many rows were inserted:
```sql
SELECT formatReadableQuantity(count())
FROM covid19;
```
```response
┌─formatReadableQuantity(count())─┐
│ 12.53 million │
└─────────────────────────────────┘
```
6. Let's see how many total cases of Covid-19 were recorded:
```sql
SELECT formatReadableQuantity(sum(new_confirmed))
FROM covid19;
```
```response
┌─formatReadableQuantity(sum(new_confirmed))─┐
│ 1.39 billion │
└────────────────────────────────────────────┘
```
7. You will notice the data has a lot of 0's for dates - either weekends or days where numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases:
```sql
SELECT
AVG(new_confirmed) OVER (PARTITION BY location_key ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS cases_smoothed,
new_confirmed,
location_key,
date
FROM covid19;
```
8. This query determines the latest values for each location. We can't use `max(date)` because not all countries reported every day, so we grab the last row using `ROW_NUMBER`:
```sql
WITH latest_deaths_data AS
( SELECT location_key,
date,
new_deceased,
new_confirmed,
ROW_NUMBER() OVER (PARTITION BY location_key ORDER BY date DESC) as rn
FROM covid19)
SELECT location_key,
date,
new_deceased,
new_confirmed,
rn
FROM latest_deaths_data
WHERE rn=1;
```
9. We can use `lagInFrame` to determine the `LAG` of new cases each day. In this query we filter by the `US_DC` location:
```sql
SELECT
new_confirmed - lagInFrame(new_confirmed,1) OVER (PARTITION BY location_key ORDER BY date) AS confirmed_cases_delta,
new_confirmed,
location_key,
date
FROM covid19
WHERE location_key = 'US_DC';
```
The response look like:
```response
┌─confirmed_cases_delta─┬─new_confirmed─┬─location_key─┬───────date─┐
│ 0 │ 0 │ US_DC │ 2020-03-08 │
│ 2 │ 2 │ US_DC │ 2020-03-09 │
│ -2 │ 0 │ US_DC │ 2020-03-10 │
│ 6 │ 6 │ US_DC │ 2020-03-11 │
│ -6 │ 0 │ US_DC │ 2020-03-12 │
│ 0 │ 0 │ US_DC │ 2020-03-13 │
│ 6 │ 6 │ US_DC │ 2020-03-14 │
│ -5 │ 1 │ US_DC │ 2020-03-15 │
│ 4 │ 5 │ US_DC │ 2020-03-16 │
│ 4 │ 9 │ US_DC │ 2020-03-17 │
│ -1 │ 8 │ US_DC │ 2020-03-18 │
│ 24 │ 32 │ US_DC │ 2020-03-19 │
│ -26 │ 6 │ US_DC │ 2020-03-20 │
│ 15 │ 21 │ US_DC │ 2020-03-21 │
│ -3 │ 18 │ US_DC │ 2020-03-22 │
│ 3 │ 21 │ US_DC │ 2020-03-23 │
```
10. This query calculates the percentage of change in new cases each day, and includes a simple `increase` or `decrease` column in the result set:
```sql
WITH confirmed_lag AS (
SELECT
*,
lagInFrame(new_confirmed) OVER(
PARTITION BY location_key
ORDER BY date
) AS confirmed_previous_day
FROM covid19
),
confirmed_percent_change AS (
SELECT
*,
COALESCE(ROUND((new_confirmed - confirmed_previous_day) / confirmed_previous_day * 100), 0) AS percent_change
FROM confirmed_lag
)
SELECT
date,
new_confirmed,
percent_change,
CASE
WHEN percent_change > 0 THEN 'increase'
WHEN percent_change = 0 THEN 'no change'
ELSE 'decrease'
END AS trend
FROM confirmed_percent_change
WHERE location_key = 'US_DC';
```
The results look like
```response
┌───────date─┬─new_confirmed─┬─percent_change─┬─trend─────┐
│ 2020-03-08 │ 0 │ nan │ decrease │
│ 2020-03-09 │ 2 │ inf │ increase │
│ 2020-03-10 │ 0 │ -100 │ decrease │
│ 2020-03-11 │ 6 │ inf │ increase │
│ 2020-03-12 │ 0 │ -100 │ decrease │
│ 2020-03-13 │ 0 │ nan │ decrease │
│ 2020-03-14 │ 6 │ inf │ increase │
│ 2020-03-15 │ 1 │ -83 │ decrease │
│ 2020-03-16 │ 5 │ 400 │ increase │
│ 2020-03-17 │ 9 │ 80 │ increase │
│ 2020-03-18 │ 8 │ -11 │ decrease │
│ 2020-03-19 │ 32 │ 300 │ increase │
│ 2020-03-20 │ 6 │ -81 │ decrease │
│ 2020-03-21 │ 21 │ 250 │ increase │
│ 2020-03-22 │ 18 │ -14 │ decrease │
│ 2020-03-23 │ 21 │ 17 │ increase │
│ 2020-03-24 │ 46 │ 119 │ increase │
│ 2020-03-25 │ 48 │ 4 │ increase │
│ 2020-03-26 │ 36 │ -25 │ decrease │
│ 2020-03-27 │ 37 │ 3 │ increase │
│ 2020-03-28 │ 38 │ 3 │ increase │
│ 2020-03-29 │ 59 │ 55 │ increase │
│ 2020-03-30 │ 94 │ 59 │ increase │
│ 2020-03-31 │ 91 │ -3 │ decrease │
│ 2020-04-01 │ 67 │ -26 │ decrease │
│ 2020-04-02 │ 104 │ 55 │ increase │
│ 2020-04-03 │ 145 │ 39 │ increase │
```
:::note
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
:::

View File

@ -0,0 +1,219 @@
---
slug: /en/getting-started/example-datasets/youtube-dislikes
sidebar_label: YouTube Dislikes
description: A collection is dislikes of YouTube videos.
---
# YouTube dataset of dislikes
In November of 2021, YouTube removed the public ***dislike*** count from all of its videos. While creators can still see the number of dislikes, viewers can only see how many ***likes*** a video has received.
:::important
The dataset has over 4.55 billion records, so be careful just copying-and-pasting the commands below unless your resources can handle that type of volume. The commands below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud).
:::
The data is in a JSON format and can be downloaded from [archive.org](https://archive.org/download/dislikes_youtube_2021_12_video_json_files). We have made this same data available in S3 so that it can be downloaded more efficiently into a ClickHouse Cloud instance.
Here are the steps to create a table in ClickHouse Cloud and insert the data.
:::note
The steps below will easily work on a local install of ClickHouse too. The only change would be to use the `s3` function instead of `s3cluster` (unless you have a cluster configured - in which case change `default` to the name of your cluster).
:::
## Step-by-step instructions
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
```sql
DESCRIBE s3Cluster(
'default',
'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
'JSONLines'
);
```
ClickHouse infers the following schema from the JSON file:
```response
┌─name────────────────┬─type─────────────────────────────────┐
│ id │ Nullable(String) │
│ fetch_date │ Nullable(Int64) │
│ upload_date │ Nullable(String) │
│ title │ Nullable(String) │
│ uploader_id │ Nullable(String) │
│ uploader │ Nullable(String) │
│ uploader_sub_count │ Nullable(Int64) │
│ is_age_limit │ Nullable(Bool) │
│ view_count │ Nullable(Int64) │
│ like_count │ Nullable(Int64) │
│ dislike_count │ Nullable(Int64) │
│ is_crawlable │ Nullable(Bool) │
│ is_live_content │ Nullable(Bool) │
│ has_subtitles │ Nullable(Bool) │
│ is_ads_enabled │ Nullable(Bool) │
│ is_comments_enabled │ Nullable(Bool) │
│ description │ Nullable(String) │
│ rich_metadata │ Array(Map(String, Nullable(String))) │
│ super_titles │ Array(Map(String, Nullable(String))) │
│ uploader_badges │ Nullable(String) │
│ video_badges │ Nullable(String) │
└─────────────────────┴──────────────────────────────────────┘
```
2. Based on the inferred schema, we cleaned up the data types and added a primary key. Define the following table:
```sql
CREATE TABLE youtube
(
`id` String,
`fetch_date` DateTime,
`upload_date` String,
`title` String,
`uploader_id` String,
`uploader` String,
`uploader_sub_count` Int64,
`is_age_limit` Bool,
`view_count` Int64,
`like_count` Int64,
`dislike_count` Int64,
`is_crawlable` Bool,
`has_subtitles` Bool,
`is_ads_enabled` Bool,
`is_comments_enabled` Bool,
`description` String,
`rich_metadata` Array(Map(String, String)),
`super_titles` Array(Map(String, String)),
`uploader_badges` String,
`video_badges` String
)
ENGINE = MergeTree
ORDER BY (upload_date, uploader);
```
3. The following command streams the records from the S3 files into the `youtube` table.
:::important
This inserts a lot of data - 4.65 billion rows. If you do not want the entire dataset, simply add a `LIMIT` clause with the desired number of rows.
:::
```sql
INSERT INTO youtube
SETTINGS input_format_null_as_default = 1
SELECT
id,
parseDateTimeBestEffortUS(toString(fetch_date)) AS fetch_date,
upload_date,
ifNull(title, '') AS title,
uploader_id,
ifNull(uploader, '') AS uploader,
uploader_sub_count,
is_age_limit,
view_count,
like_count,
dislike_count,
is_crawlable,
has_subtitles,
is_ads_enabled,
is_comments_enabled,
ifNull(description, '') AS description,
rich_metadata,
super_titles,
ifNull(uploader_badges, '') AS uploader_badges,
ifNull(video_badges, '') AS video_badges
FROM s3Cluster(
'default',
'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
'JSONLines'
);
```
4. Open a new tab in the SQL Console of ClickHouse Cloud (or a new `clickhouse-client` window) and watch the count increase. It will take a while to insert 4.56B rows, depending on your server resources. (Withtout any tweaking of settings, it takes about 4.5 hours.)
```sql
SELECT formatReadableQuantity(count())
FROM youtube
```
```response
┌─formatReadableQuantity(count())─┐
│ 4.56 billion │
└─────────────────────────────────┘
```
5. Once the data is inserted, go ahead and count the number of dislikes of your favorite videos or channels. Let's see how many videos were uploaded by ClickHouse:
```sql
SELECT count()
FROM youtube
WHERE uploader = 'ClickHouse';
```
```response
┌─count()─┐
│ 84 │
└─────────┘
1 row in set. Elapsed: 0.570 sec. Processed 237.57 thousand rows, 5.77 MB (416.54 thousand rows/s., 10.12 MB/s.)
```
:::note
The query above runs so quickly because we chose `uploader` as the first column of the primary key - so it only had to process 237k rows.
:::
6. Let's look and likes and dislikes of ClickHouse videos:
```sql
SELECT
title,
like_count,
dislike_count
FROM youtube
WHERE uploader = 'ClickHouse'
ORDER BY dislike_count DESC;
```
The response looks like:
```response
┌─title────────────────────────────────────────────────────────────────────────────────────────────────┬─like_count─┬─dislike_count─┐
│ ClickHouse v21.11 Release Webinar │ 52 │ 3 │
│ ClickHouse Introduction │ 97 │ 3 │
│ Casa Modelo Algarve │ 180 │ 3 │
│ Профайлер запросов: трудный путь │ 33 │ 3 │
│ ClickHouse в Курсометре │ 4 │ 2 │
│ 10 Good Reasons to Use ClickHouse │ 27 │ 2 │
...
84 rows in set. Elapsed: 0.013 sec. Processed 155.65 thousand rows, 16.94 MB (11.96 million rows/s., 1.30 GB/s.)
```
7. Here is a search for videos with **ClickHouse** in the `title` or `description` fields:
```sql
SELECT
view_count,
like_count,
dislike_count,
concat('https://youtu.be/', id) AS url,
title
FROM youtube
WHERE (title ILIKE '%ClickHouse%') OR (description ILIKE '%ClickHouse%')
ORDER BY
like_count DESC,
view_count DESC
```
This query has to process every row, and also parse through two columns of strings. Even then, we get decent performance at 4.15M rows/second:
```response
1174 rows in set. Elapsed: 1099.368 sec. Processed 4.56 billion rows, 1.98 TB (4.15 million rows/s., 1.80 GB/s.)
```
The results look like:
```response
┌─view_count─┬─like_count─┬─dislike_count─┬─url──────────────────────────┬─title──────────────────────────────────────────────────────────────────────────────────────────────────┐
│ 1919 │ 63 │ 1 │ https://youtu.be/b9MeoOtAivQ │ ClickHouse v21.10 Release Webinar │
│ 8710 │ 62 │ 4 │ https://youtu.be/PeV1mC2z--M │ What is JDBC DriverManager? | JDBC │
│ 3534 │ 62 │ 1 │ https://youtu.be/8nWRhK9gw10 │ CLICKHOUSE - Arquitetura Modular │
```

View File

@ -154,7 +154,7 @@ Arrays are written as a list of comma-separated values in square brackets. Numbe
In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing.
Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array.
Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) structures is represented as an array.
For example:
@ -1150,7 +1150,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y
### Usage of Nested Structures {#jsoneachrow-nested}
If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.
If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.
For example, consider the following table:
@ -1776,7 +1776,7 @@ message MessageType {
```
ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on).
Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md).
Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/index.md).
Default values defined in a protobuf schema like this
@ -1978,7 +1978,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`.
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`.
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.

View File

@ -6,7 +6,7 @@ keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, clien
description: ClickHouse provides three network interfaces
---
# Interfaces
# Drivers and Interfaces
ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security):

View File

@ -331,7 +331,7 @@ It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the
<s3>
<volumes>
<main>
<disk>s3</disk>
<disk>s3_plain</disk>
</main>
</volumes>
</s3>

View File

@ -85,8 +85,8 @@ make the matching more natural, all query-level settings related to the query ca
If the query was aborted due to an exception or user cancellation, no entry is written into the query cache.
The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in
records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
To define how long a query must run at least such that its result can be cached, you can use setting
[query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query

View File

@ -1361,7 +1361,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
The following settings are available:
- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
- `max_size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
- `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
@ -1369,7 +1369,7 @@ The following settings are available:
Changed settings take effect immediately.
:::warning
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size` or disable the query cache altogether.
:::
**Example**
@ -1882,6 +1882,16 @@ The update is performed asynchronously, in a separate system thread.
Manage executing [distributed ddl queries](../../sql-reference/distributed-ddl.md) (CREATE, DROP, ALTER, RENAME) on cluster.
Works only if [ZooKeeper](#server-settings_zookeeper) is enabled.
The configurable settings within `<distributed_ddl>` include:
- **path**: the path in Keeper for the `task_queue` for DDL queries
- **profile**: the profile used to execute the DDL queries
- **pool_size**: how many `ON CLUSTER` queries can be run simultaneously
- **max_tasks_in_queue**: the maximum number of tasks that can be in the queue. Default is 1,000
- **task_max_lifetime**: delete node if its age is greater than this value. Default is `7 * 24 * 60 * 60` (a week in seconds)
- **cleanup_delay_period**: cleaning starts after new node event is received if the last cleaning wasn't made sooner than `cleanup_delay_period` seconds ago. Default is 60 seconds
**Example**
```xml

View File

@ -964,7 +964,7 @@ Default value: 1.
### input_format_arrow_import_nested {#input_format_arrow_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
Possible values:
@ -1024,7 +1024,7 @@ Default value: `none`.
### input_format_orc_import_nested {#input_format_orc_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
Possible values:
@ -1073,7 +1073,7 @@ Default value: `none`.
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
Possible values:
@ -1538,6 +1538,6 @@ Default value: `1GiB`.
### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
Allow types conversion in Native input format between columns from input data and requested columns.
Allow types conversion in Native input format between columns from input data and requested columns.
Enabled by default.

View File

@ -3438,7 +3438,7 @@ Default value: `throw`.
## flatten_nested {#flatten-nested}
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns.
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.
Possible values:
@ -4049,3 +4049,32 @@ Possible values:
- 1 - enabled
Default value: `0`.
## stop_reading_on_first_cancel {#stop_reading_on_first_cancel}
When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterward, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests.
**Example without setting on Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000)
Cancelling query.
Ok.
Query was cancelled.
0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.)
```
**Example with setting on Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000) SETTINGS stop_reading_on_first_cancel=true
┌──────sum(number)─┐
│ 1355411451286266 │
└──────────────────┘
1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.)
```
Possible values: `true`, `false`
Default value: `false`

View File

@ -1,7 +1,7 @@
---
slug: /en/operations/utilities/
sidebar_position: 56
sidebar_label: Utilities
sidebar_label: List of tools and utilities
pagination_next: 'en/operations/utilities/clickhouse-copier'
---

View File

@ -1,13 +1,33 @@
---
slug: /en/sql-reference/data-types/
sidebar_label: Data Types
sidebar_label: List of data types
sidebar_position: 37
---
# Data Types
# ClickHouse Data Types
ClickHouse can store various kinds of data in table cells.
ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any.
This section describes the supported data types and special considerations for using and/or implementing them if any.
:::note
You can check whether a data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
:::
You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
ClickHouse data types include:
- **Integer types**: [signed and unsigned integers](./int-uint.md) (`UInt8`, `UInt16`, `UInt32`, `UInt64`, `UInt128`, `UInt256`, `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `Int256`)
- **Floating-point numbers**: [floats](./float.md)(`Float32` and `Float64`) and [`Decimal` values](./decimal.md)
- **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md)
- **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md)
- **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time
- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column
- **UUID**: a performant option for storing [`UUID` values](./uuid.md)
- **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column
- **Arrays**: any column can be defined as an [`Array` of values](./array.md)
- **Maps**: use [`Map`](./map.md) for storing key/value pairs
- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
- **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)

View File

@ -1,7 +1,105 @@
---
slug: /en/sql-reference/data-types/nested-data-structures/
sidebar_label: Nested Data Structures
sidebar_position: 54
slug: /en/sql-reference/data-types/nested-data-structures/nested
sidebar_position: 57
sidebar_label: Nested(Name1 Type1, Name2 Type2, ...)
---
# Nested Data Structures
# Nested
## Nested(name1 Type1, Name2 Type2, …)
A nested data structure is like a table inside a cell. The parameters of a nested data structure the column names and types are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.
Example:
``` sql
CREATE TABLE test.visits
(
CounterID UInt32,
StartDate Date,
Sign Int8,
IsNew UInt8,
VisitID UInt64,
UserID UInt64,
...
Goals Nested
(
ID UInt32,
Serial UInt32,
EventTime DateTime,
Price Int64,
OrderID String,
CurrencyID UInt32
),
...
) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
```
This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the visits table can correspond to zero or any number of conversions.
When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported.
In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length.
Example:
``` sql
SELECT
Goals.ID,
Goals.EventTime
FROM test.visits
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐
│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │
│ [1073752] │ ['2014-03-17 00:28:25'] │
│ [1073752] │ ['2014-03-17 10:46:20'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │
│ [] │ [] │
│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │
│ [] │ [] │
│ [] │ [] │
│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │
└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘
```
It is easiest to think of a nested data structure as a set of multiple column arrays of the same length.
The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example:
``` sql
SELECT
Goal.ID,
Goal.EventTime
FROM test.visits
ARRAY JOIN Goals AS Goal
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goal.ID─┬──────Goal.EventTime─┐
│ 1073752 │ 2014-03-17 16:38:10 │
│ 591325 │ 2014-03-17 16:38:48 │
│ 591325 │ 2014-03-17 16:42:27 │
│ 1073752 │ 2014-03-17 00:28:25 │
│ 1073752 │ 2014-03-17 10:46:20 │
│ 1073752 │ 2014-03-17 13:59:20 │
│ 591325 │ 2014-03-17 22:17:55 │
│ 591325 │ 2014-03-17 22:18:07 │
│ 591325 │ 2014-03-17 22:18:51 │
│ 1073752 │ 2014-03-17 11:37:06 │
└─────────┴─────────────────────┘
```
You cant perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it.
For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length.
For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way.
The ALTER query for elements in a nested data structure has limitations.

View File

@ -1,105 +0,0 @@
---
slug: /en/sql-reference/data-types/nested-data-structures/nested
sidebar_position: 57
sidebar_label: Nested(Name1 Type1, Name2 Type2, ...)
---
# Nested
## Nested(name1 Type1, Name2 Type2, …)
A nested data structure is like a table inside a cell. The parameters of a nested data structure the column names and types are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.
Example:
``` sql
CREATE TABLE test.visits
(
CounterID UInt32,
StartDate Date,
Sign Int8,
IsNew UInt8,
VisitID UInt64,
UserID UInt64,
...
Goals Nested
(
ID UInt32,
Serial UInt32,
EventTime DateTime,
Price Int64,
OrderID String,
CurrencyID UInt32
),
...
) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
```
This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the visits table can correspond to zero or any number of conversions.
When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported.
In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length.
Example:
``` sql
SELECT
Goals.ID,
Goals.EventTime
FROM test.visits
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐
│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │
│ [1073752] │ ['2014-03-17 00:28:25'] │
│ [1073752] │ ['2014-03-17 10:46:20'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │
│ [] │ [] │
│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │
│ [] │ [] │
│ [] │ [] │
│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │
└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘
```
It is easiest to think of a nested data structure as a set of multiple column arrays of the same length.
The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example:
``` sql
SELECT
Goal.ID,
Goal.EventTime
FROM test.visits
ARRAY JOIN Goals AS Goal
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goal.ID─┬──────Goal.EventTime─┐
│ 1073752 │ 2014-03-17 16:38:10 │
│ 591325 │ 2014-03-17 16:38:48 │
│ 591325 │ 2014-03-17 16:42:27 │
│ 1073752 │ 2014-03-17 00:28:25 │
│ 1073752 │ 2014-03-17 10:46:20 │
│ 1073752 │ 2014-03-17 13:59:20 │
│ 591325 │ 2014-03-17 22:17:55 │
│ 591325 │ 2014-03-17 22:18:07 │
│ 591325 │ 2014-03-17 22:18:51 │
│ 1073752 │ 2014-03-17 11:37:06 │
└─────────┴─────────────────────┘
```
You cant perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it.
For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length.
For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way.
The ALTER query for elements in a nested data structure has limitations.

View File

@ -1232,12 +1232,14 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## formatDateTime
## formatDateTime {#date_time_functions-formatDateTime}
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime).
Alias: `DATE_FORMAT`.
**Syntax**
@ -1257,7 +1259,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
|----------|---------------------------------------------------------|------------|
| %a | abbreviated weekday name (Mon-Sun) | Mon |
| %b | abbreviated month name (Jan-Dec) | Jan |
| %c | month as a decimal number (01-12) | 01 |
| %c | month as an integer number (01-12) | 01 |
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
@ -1273,7 +1275,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %j | day of the year (001-366) | 002 |
| %k | hour in 24h format (00-23) | 22 |
| %l | hour in 12h format (01-12) | 09 |
| %m | month as a decimal number (01-12) | 01 |
| %m | month as an integer number (01-12) | 01 |
| %M | minute (00-59) | 33 |
| %n | new-line character () | |
| %p | AM or PM designation | PM |
@ -1286,7 +1288,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 |
| %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 |
| %V | ISO 8601 week number (01-53) | 01 |
| %w | weekday as a decimal number with Sunday as 0 (0-6) | 2 |
| %w | weekday as a integer number with Sunday as 0 (0-6) | 2 |
| %W | full weekday name (Monday-Sunday) | Monday |
| %y | Year, last two digits (00-99) | 18 |
| %Y | Year | 2018 |
@ -1328,10 +1330,11 @@ Result:
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)
## formatDateTimeInJodaSyntax
## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax}
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax).
**Replacement fields**

View File

@ -1148,6 +1148,85 @@ Result:
└───────────────────────────┴──────────────────────────────┘
```
## parseDateTime {#type_conversion_functions-parseDateTime}
Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime).
**Syntax**
``` sql
parseDateTime(str, format[, timezone])
```
**Arguments**
- `str` — the String to be parsed
- `format` — the format string
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
**Returned value(s)**
Returns DateTime values parsed from input string according to a MySQL style format string.
**Supported format specifiers**
All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
- %f: fractional second
- %Q: Quarter (1-4)
**Example**
``` sql
SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')
┌─parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')─┐
│ 2021-01-04 23:00:00 │
└───────────────────────────────────────────────────────────┘
```
Alias: `TO_TIMESTAMP`.
## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax}
Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax).
**Syntax**
``` sql
parseDateTimeInJodaSyntax(str, format[, timezone])
```
**Arguments**
- `str` — the String to be parsed
- `format` — the format string
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
**Returned value(s)**
Returns DateTime values parsed from input string according to a Joda style format.
**Supported format specifiers**
All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except:
- S: fraction of second
- z: time zone
- Z: time zone offset/id
**Example**
``` sql
SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')
┌─parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')─┐
│ 2023-02-24 14:53:31 │
└─────────────────────────────────────────────────────────────────────────────────────────┘
```
## parseDateTimeBestEffort
## parseDateTime32BestEffort
@ -1351,7 +1430,6 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that
Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity and returns zero date or zero date time when it encounters a date format that cannot be processed.
## toLowCardinality
Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type.

View File

@ -36,7 +36,61 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'.
## Examples
1. The following username is `name1` and does not require a password - which obviously doesn't provide much security:
```sql
CREATE USER name1 NOT IDENTIFIED
```
2. To specify a plaintext password:
```sql
CREATE USER name2 IDENTIFIED WITH plaintext_password BY 'my_password'
```
:::warning
The password is stored in a SQL text file in `/var/lib/clickhouse/access`, so it's not a good idea to use `plaintext_password`. Try `sha256_password` instead, as demonstrated next...
:::
3. The best option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example:
```sql
CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
```
Notice ClickHouse generates and runs the following command for you:
```response
CREATE USER name3
IDENTIFIED WITH sha256_hash
BY '8B3404953FCAA509540617F082DB13B3E0734F90FF6365C19300CC6A6EA818D6'
SALT 'D6489D8B5692D82FF944EA6415785A8A8A1AF33825456AFC554487725A74A609'
```
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
```bash
/var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql
ATTACH USER name3 IDENTIFIED WITH sha256_hash BY '0C268556C1680BEF0640AAC1E7187566704208398DA31F03D18C74F5C5BE5053' SALT '4FB16307F5E10048196966DD7E6876AE53DE6A1D1F625488482C75F14A5097C7';
```
:::note
If you have already created a hash value and corresponding salt value for a username, then you can use `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`. For identification with `sha256_hash` using `SALT` - hash must be calculated from concatenation of 'password' and 'salt'.
:::
4. The `double_sha1_password` is not typically needed, but comes in handy when working with clients that require it (like the MySQL interface):
```sql
CREATE USER name4 IDENTIFIED WITH double_sha1_password BY 'my_password'
```
ClickHouse generates and runs the following query:
```response
CREATE USER name4 IDENTIFIED WITH double_sha1_hash BY 'CCD3A959D6A004B9C3807B728BC2E55B67E10518'
```
## User Host

View File

@ -24,9 +24,9 @@ The `DESCRIBE` statement returns a row for each table column with the following
- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression.
- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
All columns in [Nested](../../sql-reference/data-types/nested-data-structures/index.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
**Example**

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/statements/
sidebar_position: 1
sidebar_label: Statements
sidebar_label: List of statements
---
# ClickHouse SQL Statements
# ClickHouse SQL Statements
Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has its own syntax and usage details that are described separately:

View File

@ -185,7 +185,7 @@ SETTINGS enable_unaligned_array_join = 1;
## ARRAY JOIN with Nested Data Structure
`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/nested.md):
`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/index.md):
``` sql
CREATE TABLE nested_test

View File

@ -4084,3 +4084,32 @@ ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1;
Задает символ, который интерпретируется как суффикс после результирующего набора данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## stop_reading_on_first_cancel {#stop_reading_on_first_cancel}
Если установлено значение `true` и пользователь хочет прервать запрос (например, с помощью `Ctrl+C` на клиенте), то запрос продолжает выполнение только для данных, которые уже были считаны из таблицы. После этого он вернет частичный результат запроса для той части таблицы, которая была прочитана. Чтобы полностью остановить выполнение запроса без частичного результата, пользователь должен отправить 2 запроса отмены.
**Пример с выключенной настройкой при нажатии Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000)
Cancelling query.
Ok.
Query was cancelled.
0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.)
```
**Пример с включенной настройкой при нажатии Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000) SETTINGS stop_reading_on_first_cancel=true
┌──────sum(number)─┐
│ 1355411451286266 │
└──────────────────┘
1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.)
```
Возможные значения:: `true`, `false`
Значение по умолчанию: `false`

View File

@ -97,7 +97,7 @@ CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user',
### DDL查询 {#ddl-queries}
MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询则该查询将被忽略。
MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询则该查询将被忽略。
### Data Replication {#data-replication}

View File

@ -109,7 +109,7 @@ MySQL中的Time 类型会被ClickHouse转换成微秒来存储
### DDL Queries {#ddl-queries}
MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。
MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。
### 数据复制 {#data-replication}

View File

@ -1,5 +1,5 @@
---
slug: /zh/faq/general
slug: /zh/faq/general/overview
---
# 常见问题 {#chang-jian-wen-ti}

View File

@ -7,20 +7,20 @@ sidebar_position: 101
# 什么是列存储数据库? {#what-is-a-columnar-database}
列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse就是这样一个典型的例子。
列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse 就是这样一个典型的例子。
列存储数据库的主要优点是:
- 查询只使用许多列其中的少数列。
聚合对大量数据的查询。
按列压缩。
- 聚合对大量数据的查询。
- 按列压缩。
下面是构建报表时传统的面向行系统和柱状数据库之间的区别:
**传统行存储**
!(传统行存储)(https://clickhouse.com/docs/en/images/row-oriented.gif)
![传统行存储](https://clickhouse.com/docs/assets/images/row-oriented-3e6fd5aa48e3075202d242b4799da8fa.gif)
**列存储**
!(列存储)(https://clickhouse.com/docs/en/images/column-oriented.gif)
![列存储](https://clickhouse.com/docs/assets/images/column-oriented-d082e49b7743d4ded32c7952bfdb028f.gif)
列存储数据库是分析应用程序的首选因为它允许在一个表中有许多列以防万一但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的因为和数据仓库一样它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。
列存储数据库是分析应用程序的首选因为它允许在一个表中有许多列以防万一但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的因为和数据仓库一样它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse 结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。

View File

@ -21,8 +21,7 @@ sidebar_label: General
- [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)
!!! info "没找到您需要的内容?"
请查阅 [其他 F.A.Q. 类别](../../faq/) 或者从左侧导航栏浏览其他文档
请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档
{## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##}

View File

@ -338,6 +338,12 @@ UserID.binURL.bin和EventTime.bin是<font face = "monospace">UserID</font>
:::note
- 最后一个颗粒1082颗粒是少于8192行的。
- 我们在本指南开头的“DDL 语句详细信息”中提到,我们禁用了自适应索引粒度(为了简化本指南中的讨论,并使图表和结果可重现)。
因此,示例表中所有颗粒(除了最后一个)都具有相同大小。
- 对于具有自适应索引粒度的表(默认情况下索引粒度是自适应的),某些粒度的大小可以小于 8192 行,具体取决于行数据大小。
- 我们将主键列(<font face = "monospace">UserID</font>, <font face = "monospace">URL</font>)中的一些列值标记为橙色。
这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。

View File

@ -1,10 +0,0 @@
---
slug: /zh/sql-reference/functions/geo/
sidebar_label: Geo
sidebar_position: 62
title: "Geo Functions"
---
import Content from '@site/docs/en/sql-reference/functions/geo/index.md';
<Content />

View File

@ -1,5 +1,5 @@
---
slug: /zh/sql-reference/statements/alter/
slug: /zh/sql-reference/statements/alter/overview
sidebar_position: 35
sidebar_label: ALTER
---

View File

@ -1,11 +0,0 @@
---
slug: /zh/sql-reference/statements/create/
sidebar_label: CREATE
sidebar_position: 34
---
# CREATE语法 {#create-queries}
CREATE语法包含以下子集:
- [DATABASE](../../../sql-reference/statements/create/database.md)

View File

@ -10,7 +10,7 @@ sidebar_position: 31
- [SELECT](../../sql-reference/statements/select/index.md)
- [INSERT INTO](../../sql-reference/statements/insert-into.md)
- [CREATE](../../sql-reference/statements/create/index.md)
- [CREATE](../../sql-reference/statements/create.md)
- [ALTER](../../sql-reference/statements/alter/index.md)
- [SYSTEM](../../sql-reference/statements/system.md)
- [SHOW](../../sql-reference/statements/show.md)

View File

@ -222,6 +222,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user to create")
("group", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_GROUP), "clickhouse group to create")
("noninteractive,y", "run non-interactively")
("link", "create symlink to the binary instead of copying to binary-path")
;
po::variables_map options;
@ -267,8 +269,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// Copy binary to the destination directory.
/// TODO An option to link instead of copy - useful for developers.
fs::path prefix = options["prefix"].as<std::string>();
fs::path bin_dir = prefix / options["binary-path"].as<std::string>();
@ -281,76 +281,129 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
bool old_binary_exists = fs::exists(main_bin_path);
bool already_installed = false;
/// Check if the binary is the same file (already installed).
if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
if (options.count("link"))
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
}
/// Check if binary has the same content.
else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
{
fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
main_bin_path.string());
if (filesEqual(binary_self_path.string(), main_bin_path.string()))
if (old_binary_exists)
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
main_bin_path.string(), binary_self_canonical_path.string());
}
}
bool is_symlink = FS::isSymlink(main_bin_path);
fs::path points_to;
if (is_symlink)
points_to = fs::weakly_canonical(FS::readSymlink(main_bin_path));
if (already_installed)
{
if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
if (is_symlink && points_to == binary_self_canonical_path)
{
already_installed = true;
}
else
{
if (!is_symlink)
{
fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
main_bin_path.string(), main_bin_old_path.string());
fs::rename(main_bin_path, main_bin_old_path);
}
else if (points_to != main_bin_path)
{
fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
main_bin_path.string(), points_to.string(), binary_self_canonical_path.string());
fs::remove(main_bin_path);
}
}
}
if (!already_installed)
{
if (!fs::exists(bin_dir))
{
fmt::print("Creating binary directory {}.\n", bin_dir.string());
fs::create_directories(bin_dir);
}
fmt::print("Creating symlink {} to {}.\n", main_bin_path.string(), binary_self_canonical_path.string());
fs::create_symlink(binary_self_canonical_path, main_bin_path);
if (0 != chmod(binary_self_canonical_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", binary_self_canonical_path.string()), ErrorCodes::SYSTEM_ERROR);
}
}
else
{
if (!fs::exists(bin_dir))
bool is_symlink = FS::isSymlink(main_bin_path);
if (!is_symlink)
{
fmt::print("Creating binary directory {}.\n", bin_dir.string());
fs::create_directories(bin_dir);
/// Check if the binary is the same file (already installed).
if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
}
/// Check if binary has the same content.
else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
{
fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
main_bin_path.string());
if (filesEqual(binary_self_path.string(), main_bin_path.string()))
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
main_bin_path.string(), binary_self_canonical_path.string());
}
}
}
size_t available_space = fs::space(bin_dir).available;
if (available_space < binary_size)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
try
if (already_installed)
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
out.finalize();
if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
}
catch (const Exception & e)
else
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n';
throw;
if (!fs::exists(bin_dir))
{
fmt::print("Creating binary directory {}.\n", bin_dir.string());
fs::create_directories(bin_dir);
}
size_t available_space = fs::space(bin_dir).available;
if (available_space < binary_size)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
try
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
out.finalize();
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n';
throw;
}
if (old_binary_exists)
{
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
main_bin_path.string(), main_bin_old_path.string());
/// There is file exchange operation in Linux but it's not portable.
fs::rename(main_bin_path, main_bin_old_path);
}
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
fs::rename(main_bin_tmp_path, main_bin_path);
}
if (old_binary_exists)
{
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
main_bin_path.string(), main_bin_old_path.string());
/// There is file exchange operation in Linux but it's not portable.
fs::rename(main_bin_path, main_bin_old_path);
}
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
fs::rename(main_bin_tmp_path, main_bin_path);
}
/// Create symlinks.
@ -384,7 +437,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (is_symlink)
points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
if (is_symlink && points_to == main_bin_path)
if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
{
need_to_create = false;
}
@ -709,7 +762,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// dpkg or apt installers can ask for non-interactive work explicitly.
const char * debian_frontend_var = getenv("DEBIAN_FRONTEND"); // NOLINT(concurrency-mt-unsafe)
bool noninteractive = debian_frontend_var && debian_frontend_var == std::string_view("noninteractive");
bool noninteractive = (debian_frontend_var && debian_frontend_var == std::string_view("noninteractive"))
|| options.count("noninteractive");
bool is_interactive = !noninteractive && stdin_is_a_tty && stdout_is_a_tty;

View File

@ -703,6 +703,9 @@
actions of previous constraint (defined in other profiles) for the same specific setting, including fields that are not set by new constraint.
It also enables 'changeable_in_readonly' constraint type -->
<settings_constraints_replace_previous>false</settings_constraints_replace_previous>
<!-- Number of seconds since last access a role is stored in the Role Cache -->
<role_cache_expiration_time_seconds>600</role_cache_expiration_time_seconds>
</access_control_improvements>
<!-- Default profile of settings. -->
@ -1514,7 +1517,7 @@
<!-- Configuration for the query cache -->
<!-- <query_cache> -->
<!-- <size>1073741824</size> -->
<!-- <max_size>1073741824</max_size> -->
<!-- <max_entries>1024</max_entries> -->
<!-- <max_entry_size>1048576</max_entry_size> -->
<!-- <max_entry_rows>30000000</max_entry_rows> -->

View File

@ -247,7 +247,7 @@ private:
AccessControl::AccessControl()
: MultipleAccessStorage("user directories"),
context_access_cache(std::make_unique<ContextAccessCache>(*this)),
role_cache(std::make_unique<RoleCache>(*this)),
role_cache(std::make_unique<RoleCache>(*this, 600)),
row_policy_cache(std::make_unique<RowPolicyCache>(*this)),
quota_cache(std::make_unique<QuotaCache>(*this)),
settings_profiles_cache(std::make_unique<SettingsProfilesCache>(*this)),
@ -282,6 +282,8 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false));
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
role_cache = std::make_unique<RoleCache>(*this, config_.getInt("access_control_improvements.role_cache_expiration_time_seconds", 600));
}

View File

@ -674,18 +674,16 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
backup_entries_collector.getContext()->getAccessControl());
auto backup_coordination = backup_entries_collector.getBackupCoordination();
String current_host_id = backup_entries_collector.getBackupSettings().host_id;
backup_coordination->addReplicatedAccessFilePath(zookeeper_path, type, current_host_id, backup_entry_with_path.first);
backup_coordination->addReplicatedAccessFilePath(zookeeper_path, type, backup_entry_with_path.first);
backup_entries_collector.addPostTask(
[backup_entry = backup_entry_with_path.second,
zookeeper_path = zookeeper_path,
type,
current_host_id,
&backup_entries_collector,
backup_coordination]
{
for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type, current_host_id))
for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type))
backup_entries_collector.addBackupEntry(path, backup_entry);
});
}

View File

@ -56,8 +56,8 @@ namespace
}
RoleCache::RoleCache(const AccessControl & access_control_)
: access_control(access_control_), cache(600000 /* 10 minutes */)
RoleCache::RoleCache(const AccessControl & access_control_, int expiration_time_seconds)
: access_control(access_control_), cache(expiration_time_seconds * 1000 /* 10 minutes by default*/)
{
}

View File

@ -16,7 +16,7 @@ using RolePtr = std::shared_ptr<const Role>;
class RoleCache
{
public:
explicit RoleCache(const AccessControl & access_control_);
explicit RoleCache(const AccessControl & access_control_, int expiration_time_seconds);
~RoleCache();
std::shared_ptr<const EnabledRoles> getEnabledRoles(

View File

@ -49,7 +49,7 @@ QueryTreeNodePtr ArrayJoinNode::cloneImpl() const
return std::make_shared<ArrayJoinNode>(getTableExpression(), getJoinExpressionsNode(), is_left);
}
ASTPtr ArrayJoinNode::toASTImpl() const
ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto array_join_ast = std::make_shared<ASTArrayJoin>();
array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
@ -63,9 +63,9 @@ ASTPtr ArrayJoinNode::toASTImpl() const
auto * column_node = array_join_expression->as<ColumnNode>();
if (column_node && column_node->getExpression())
array_join_expression_ast = column_node->getExpression()->toAST();
array_join_expression_ast = column_node->getExpression()->toAST(options);
else
array_join_expression_ast = array_join_expression->toAST();
array_join_expression_ast = array_join_expression->toAST(options);
array_join_expression_ast->setAlias(array_join_expression->getAlias());
array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast));
@ -75,7 +75,7 @@ ASTPtr ArrayJoinNode::toASTImpl() const
array_join_ast->expression_list = array_join_ast->children.back();
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index]);
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index], options);
auto array_join_query_element_ast = std::make_shared<ASTTablesInSelectQueryElement>();
array_join_query_element_ast->children.push_back(std::move(array_join_ast));

View File

@ -99,7 +99,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
bool is_left = false;

View File

@ -91,12 +91,12 @@ QueryTreeNodePtr ColumnNode::cloneImpl() const
return std::make_shared<ColumnNode>(column, getSourceWeakPointer());
}
ASTPtr ColumnNode::toASTImpl() const
ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const
{
std::vector<std::string> column_identifier_parts;
auto column_source = getColumnSourceOrNull();
if (column_source)
if (column_source && options.fully_qualified_identifiers)
{
auto node_type = column_source->getNodeType();
if (node_type == QueryTreeNodeType::TABLE ||

View File

@ -132,7 +132,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
const QueryTreeNodeWeakPtr & getSourceWeakPointer() const

View File

@ -91,7 +91,7 @@ QueryTreeNodePtr ApplyColumnTransformerNode::cloneImpl() const
return std::make_shared<ApplyColumnTransformerNode>(getExpressionNode());
}
ASTPtr ApplyColumnTransformerNode::toASTImpl() const
ASTPtr ApplyColumnTransformerNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto ast_apply_transformer = std::make_shared<ASTColumnsApplyTransformer>();
const auto & expression_node = getExpressionNode();
@ -100,14 +100,14 @@ ASTPtr ApplyColumnTransformerNode::toASTImpl() const
{
auto & function_expression = expression_node->as<FunctionNode &>();
ast_apply_transformer->func_name = function_expression.getFunctionName();
ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST();
ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST(options);
}
else
{
auto & lambda_expression = expression_node->as<LambdaNode &>();
if (!lambda_expression.getArgumentNames().empty())
ast_apply_transformer->lambda_arg = lambda_expression.getArgumentNames()[0];
ast_apply_transformer->lambda = lambda_expression.toAST();
ast_apply_transformer->lambda = lambda_expression.toAST(options);
}
return ast_apply_transformer;
@ -227,7 +227,7 @@ QueryTreeNodePtr ExceptColumnTransformerNode::cloneImpl() const
return std::make_shared<ExceptColumnTransformerNode>(except_column_names, is_strict);
}
ASTPtr ExceptColumnTransformerNode::toASTImpl() const
ASTPtr ExceptColumnTransformerNode::toASTImpl(const ConvertToASTOptions & /* options */) const
{
auto ast_except_transformer = std::make_shared<ASTColumnsExceptTransformer>();
@ -334,7 +334,7 @@ QueryTreeNodePtr ReplaceColumnTransformerNode::cloneImpl() const
return result_replace_transformer;
}
ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
ASTPtr ReplaceColumnTransformerNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto ast_replace_transformer = std::make_shared<ASTColumnsReplaceTransformer>();
@ -347,7 +347,7 @@ ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
{
auto replacement_ast = std::make_shared<ASTColumnsReplaceTransformer::Replacement>();
replacement_ast->name = replacements_names[i];
replacement_ast->children.push_back(replacement_expressions_nodes[i]->toAST());
replacement_ast->children.push_back(replacement_expressions_nodes[i]->toAST(options));
ast_replace_transformer->children.push_back(std::move(replacement_ast));
}

View File

@ -141,7 +141,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
ApplyColumnTransformerType apply_transformer_type = ApplyColumnTransformerType::LAMBDA;
@ -220,7 +220,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
ExceptColumnTransformerType except_transformer_type;
@ -298,7 +298,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
ListNode & getReplacements()

View File

@ -75,11 +75,14 @@ QueryTreeNodePtr ConstantNode::cloneImpl() const
return std::make_shared<ConstantNode>(constant_value, source_expression);
}
ASTPtr ConstantNode::toASTImpl() const
ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
{
const auto & constant_value_literal = constant_value->getValue();
auto constant_value_ast = std::make_shared<ASTLiteral>(constant_value_literal);
if (!options.add_cast_for_constants)
return constant_value_ast;
bool need_to_add_cast_function = false;
auto constant_value_literal_type = constant_value_literal.getType();
WhichDataType constant_value_type(constant_value->getType());

View File

@ -83,7 +83,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
ConstantValuePtr constant_value;

View File

@ -197,7 +197,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
return result_function;
}
ASTPtr FunctionNode::toASTImpl() const
ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto function_ast = std::make_shared<ASTFunction>();
@ -212,12 +212,12 @@ ASTPtr FunctionNode::toASTImpl() const
const auto & parameters = getParameters();
if (!parameters.getNodes().empty())
{
function_ast->children.push_back(parameters.toAST());
function_ast->children.push_back(parameters.toAST(options));
function_ast->parameters = function_ast->children.back();
}
const auto & arguments = getArguments();
function_ast->children.push_back(arguments.toAST());
function_ast->children.push_back(arguments.toAST(options));
function_ast->arguments = function_ast->children.back();
auto window_node = getWindowNode();
@ -226,7 +226,7 @@ ASTPtr FunctionNode::toASTImpl() const
if (auto * identifier_node = window_node->as<IdentifierNode>())
function_ast->window_name = identifier_node->getIdentifier().getFullName();
else
function_ast->window_definition = window_node->toAST();
function_ast->window_definition = window_node->toAST(options);
}
return function_ast;

View File

@ -209,7 +209,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
String function_name;

View File

@ -331,9 +331,9 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const QueryTreeNodePtr & node_t
return cloneAndReplace(replacement_map);
}
ASTPtr IQueryTreeNode::toAST() const
ASTPtr IQueryTreeNode::toAST(const ConvertToASTOptions & options) const
{
auto converted_node = toASTImpl();
auto converted_node = toASTImpl(options);
if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(converted_node.get()))
converted_node->setAlias(alias);

View File

@ -181,8 +181,17 @@ public:
*/
String formatOriginalASTForErrorMessage() const;
struct ConvertToASTOptions
{
/// Add _CAST if constant litral type is different from column type
bool add_cast_for_constants = true;
/// Identifiers are fully qualified (`database.table.column`), otherwise names are just column names (`column`)
bool fully_qualified_identifiers = true;
};
/// Convert query tree to AST
ASTPtr toAST() const;
ASTPtr toAST(const ConvertToASTOptions & options = { .add_cast_for_constants = true, .fully_qualified_identifiers = true }) const;
/// Convert query tree to AST and then format it for error message.
String formatConvertedASTForErrorMessage() const;
@ -258,7 +267,7 @@ protected:
virtual QueryTreeNodePtr cloneImpl() const = 0;
/// Subclass must convert its internal state and its children to AST
virtual ASTPtr toASTImpl() const = 0;
virtual ASTPtr toASTImpl(const ConvertToASTOptions & options) const = 0;
QueryTreeNodes children;
QueryTreeWeakNodes weak_pointers;

View File

@ -58,7 +58,7 @@ QueryTreeNodePtr IdentifierNode::cloneImpl() const
return std::make_shared<IdentifierNode>(identifier);
}
ASTPtr IdentifierNode::toASTImpl() const
ASTPtr IdentifierNode::toASTImpl(const ConvertToASTOptions & /* options */) const
{
auto identifier_parts = identifier.getParts();
return std::make_shared<ASTIdentifier>(std::move(identifier_parts));

View File

@ -59,7 +59,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
Identifier identifier;

View File

@ -44,11 +44,11 @@ QueryTreeNodePtr InterpolateNode::cloneImpl() const
return std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
}
ASTPtr InterpolateNode::toASTImpl() const
ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto result = std::make_shared<ASTInterpolateElement>();
result->column = getExpression()->toAST()->getColumnName();
result->children.push_back(getInterpolateExpression()->toAST());
result->column = getExpression()->toAST(options)->getColumnName();
result->children.push_back(getInterpolateExpression()->toAST(options));
result->expr = result->children.back();
return result;

View File

@ -59,7 +59,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
static constexpr size_t expression_child_index = 0;

View File

@ -99,17 +99,17 @@ QueryTreeNodePtr JoinNode::cloneImpl() const
return std::make_shared<JoinNode>(getLeftTableExpression(), getRightTableExpression(), getJoinExpression(), locality, strictness, kind);
}
ASTPtr JoinNode::toASTImpl() const
ASTPtr JoinNode::toASTImpl(const ConvertToASTOptions & options) const
{
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index]);
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index], options);
size_t join_table_index = tables_in_select_query_ast->children.size();
auto join_ast = toASTTableJoin();
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index]);
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index], options);
auto & table_element = tables_in_select_query_ast->children.at(join_table_index)->as<ASTTablesInSelectQueryElement &>();
table_element.children.push_back(std::move(join_ast));

View File

@ -148,7 +148,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
JoinLocality locality = JoinLocality::Unspecified;

View File

@ -65,17 +65,17 @@ QueryTreeNodePtr LambdaNode::cloneImpl() const
return std::make_shared<LambdaNode>(argument_names, getExpression());
}
ASTPtr LambdaNode::toASTImpl() const
ASTPtr LambdaNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto lambda_function_arguments_ast = std::make_shared<ASTExpressionList>();
auto tuple_function = std::make_shared<ASTFunction>();
tuple_function->name = "tuple";
tuple_function->children.push_back(children[arguments_child_index]->toAST());
tuple_function->children.push_back(children[arguments_child_index]->toAST(options));
tuple_function->arguments = tuple_function->children.back();
lambda_function_arguments_ast->children.push_back(std::move(tuple_function));
lambda_function_arguments_ast->children.push_back(children[expression_child_index]->toAST());
lambda_function_arguments_ast->children.push_back(children[expression_child_index]->toAST(options));
auto lambda_function_ast = std::make_shared<ASTFunction>();
lambda_function_ast->name = "lambda";

View File

@ -98,7 +98,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
Names argument_names;

View File

@ -54,7 +54,7 @@ QueryTreeNodePtr ListNode::cloneImpl() const
return std::make_shared<ListNode>();
}
ASTPtr ListNode::toASTImpl() const
ASTPtr ListNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto expression_list_ast = std::make_shared<ASTExpressionList>();
@ -62,7 +62,7 @@ ASTPtr ListNode::toASTImpl() const
expression_list_ast->children.resize(children_size);
for (size_t i = 0; i < children_size; ++i)
expression_list_ast->children[i] = children[i]->toAST();
expression_list_ast->children[i] = children[i]->toAST(options);
return expression_list_ast;
}

View File

@ -57,7 +57,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
};
}

View File

@ -204,7 +204,7 @@ QueryTreeNodePtr MatcherNode::cloneImpl() const
return matcher_node;
}
ASTPtr MatcherNode::toASTImpl() const
ASTPtr MatcherNode::toASTImpl(const ConvertToASTOptions & options) const
{
ASTPtr result;
ASTPtr transformers;
@ -216,7 +216,7 @@ ASTPtr MatcherNode::toASTImpl() const
transformers = std::make_shared<ASTColumnsTransformerList>();
for (const auto & column_transformer : column_transformers)
transformers->children.push_back(column_transformer->toAST());
transformers->children.push_back(column_transformer->toAST(options));
}
if (matcher_type == MatcherNodeType::ASTERISK)

View File

@ -148,7 +148,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
explicit MatcherNode(MatcherNodeType matcher_type_,

View File

@ -111,7 +111,6 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int ALIAS_REQUIRED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_PREWHERE;
extern const int UNKNOWN_TABLE;
}
@ -6856,13 +6855,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.isGroupByAll())
expandGroupByAll(query_node_typed);
if (query_node_typed.hasPrewhere())
assertNoFunctionNodes(query_node_typed.getPrewhere(),
"arrayJoin",
ErrorCodes::ILLEGAL_PREWHERE,
"ARRAY JOIN",
"in PREWHERE");
validateFilters(query_node);
validateAggregates(query_node, { .group_by_use_nulls = scope.group_by_use_nulls });
for (const auto & column : projection_columns)

View File

@ -259,7 +259,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
return result_query_node;
}
ASTPtr QueryNode::toASTImpl() const
ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto select_query = std::make_shared<ASTSelectQuery>();
select_query->distinct = is_distinct;
@ -271,9 +271,9 @@ ASTPtr QueryNode::toASTImpl() const
select_query->group_by_all = is_group_by_all;
if (hasWith())
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST());
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options));
auto projection_ast = getProjection().toAST();
auto projection_ast = getProjection().toAST(options);
auto & projection_expression_list_ast = projection_ast->as<ASTExpressionList &>();
size_t projection_expression_list_ast_children_size = projection_expression_list_ast.children.size();
if (projection_expression_list_ast_children_size != getProjection().getNodes().size())
@ -293,44 +293,44 @@ ASTPtr QueryNode::toASTImpl() const
select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(projection_ast));
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree());
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree(), options);
select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast));
if (getPrewhere())
select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, getPrewhere()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, getPrewhere()->toAST(options));
if (getWhere())
select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST(options));
if (!is_group_by_all && hasGroupBy())
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST());
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST(options));
if (hasHaving())
select_query->setExpression(ASTSelectQuery::Expression::HAVING, getHaving()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::HAVING, getHaving()->toAST(options));
if (hasWindow())
select_query->setExpression(ASTSelectQuery::Expression::WINDOW, getWindow().toAST());
select_query->setExpression(ASTSelectQuery::Expression::WINDOW, getWindow().toAST(options));
if (hasOrderBy())
select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, getOrderBy().toAST());
select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, getOrderBy().toAST(options));
if (hasInterpolate())
select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, getInterpolate()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, getInterpolate()->toAST(options));
if (hasLimitByLimit())
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_LENGTH, getLimitByLimit()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_LENGTH, getLimitByLimit()->toAST(options));
if (hasLimitByOffset())
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_OFFSET, getLimitByOffset()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_OFFSET, getLimitByOffset()->toAST(options));
if (hasLimitBy())
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY, getLimitBy().toAST());
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY, getLimitBy().toAST(options));
if (hasLimit())
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, getLimit()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, getLimit()->toAST(options));
if (hasOffset())
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST());
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST(options));
if (hasSettingsChanges())
{

View File

@ -575,7 +575,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
bool is_subquery = false;

View File

@ -838,8 +838,14 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
const auto & function_arguments_list = table_function_expression.arguments->as<ASTExpressionList &>().children;
for (const auto & argument : function_arguments_list)
{
if (!node->getSettingsChanges().empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' has arguments after SETTINGS",
table_function_expression.formatForErrorMessage());
if (argument->as<ASTSelectQuery>() || argument->as<ASTSelectWithUnionQuery>() || argument->as<ASTSelectIntersectExceptQuery>())
node->getArguments().getNodes().push_back(buildSelectOrUnionExpression(argument, false /*is_subquery*/, {} /*cte_name*/, context));
else if (const auto * ast_set = argument->as<ASTSetQuery>())
node->setSettingsChanges(ast_set->changes);
else
node->getArguments().getNodes().push_back(buildExpression(argument, context));
}

View File

@ -109,7 +109,7 @@ QueryTreeNodePtr SortNode::cloneImpl() const
return std::make_shared<SortNode>(nullptr /*expression*/, sort_direction, nulls_sort_direction, collator, with_fill);
}
ASTPtr SortNode::toASTImpl() const
ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto result = std::make_shared<ASTOrderByElement>();
result->direction = sort_direction == SortDirection::ASCENDING ? 1 : -1;
@ -120,10 +120,10 @@ ASTPtr SortNode::toASTImpl() const
result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value();
result->with_fill = with_fill;
result->fill_from = hasFillFrom() ? getFillFrom()->toAST() : nullptr;
result->fill_to = hasFillTo() ? getFillTo()->toAST() : nullptr;
result->fill_step = hasFillStep() ? getFillStep()->toAST() : nullptr;
result->children.push_back(getExpression()->toAST());
result->fill_from = hasFillFrom() ? getFillFrom()->toAST(options) : nullptr;
result->fill_to = hasFillTo() ? getFillTo()->toAST(options) : nullptr;
result->fill_step = hasFillStep() ? getFillStep()->toAST(options) : nullptr;
result->children.push_back(getExpression()->toAST(options));
if (collator)
{

View File

@ -137,7 +137,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
static constexpr size_t sort_expression_child_index = 0;

View File

@ -7,6 +7,7 @@
#include <Storages/IStorage.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSetQuery.h>
#include <Interpreters/Context.h>
@ -71,6 +72,13 @@ void TableFunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_
buffer << '\n' << std::string(indent + 2, ' ') << "ARGUMENTS\n";
arguments.dumpTreeImpl(buffer, format_state, indent + 4);
}
if (!settings_changes.empty())
{
buffer << '\n' << std::string(indent + 2, ' ') << "SETTINGS";
for (const auto & change : settings_changes)
buffer << fmt::format(" {}={}", change.name, toString(change.value));
}
}
bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
@ -82,6 +90,9 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
if (storage && rhs_typed.storage)
return storage_id == rhs_typed.storage_id;
if (settings_changes != rhs_typed.settings_changes)
return false;
return table_expression_modifiers == rhs_typed.table_expression_modifiers;
}
@ -99,6 +110,17 @@ void TableFunctionNode::updateTreeHashImpl(HashState & state) const
if (table_expression_modifiers)
table_expression_modifiers->updateTreeHash(state);
state.update(settings_changes.size());
for (const auto & change : settings_changes)
{
state.update(change.name.size());
state.update(change.name);
const auto & value_dump = change.value.dump();
state.update(value_dump.size());
state.update(value_dump);
}
}
QueryTreeNodePtr TableFunctionNode::cloneImpl() const
@ -109,20 +131,29 @@ QueryTreeNodePtr TableFunctionNode::cloneImpl() const
result->storage_id = storage_id;
result->storage_snapshot = storage_snapshot;
result->table_expression_modifiers = table_expression_modifiers;
result->settings_changes = settings_changes;
return result;
}
ASTPtr TableFunctionNode::toASTImpl() const
ASTPtr TableFunctionNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto table_function_ast = std::make_shared<ASTFunction>();
table_function_ast->name = table_function_name;
const auto & arguments = getArguments();
table_function_ast->children.push_back(arguments.toAST());
table_function_ast->children.push_back(arguments.toAST(options));
table_function_ast->arguments = table_function_ast->children.back();
if (!settings_changes.empty())
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->changes = settings_changes;
settings_ast->is_standalone = false;
table_function_ast->arguments->children.push_back(std::move(settings_ast));
}
return table_function_ast;
}

View File

@ -1,5 +1,7 @@
#pragma once
#include <Common/SettingsChanges.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/TableLockHolder.h>
#include <Storages/StorageSnapshot.h>
@ -122,6 +124,18 @@ public:
return table_expression_modifiers;
}
/// Get settings changes passed to table function
const SettingsChanges & getSettingsChanges() const
{
return settings_changes;
}
/// Set settings changes passed as last argument to table function
void setSettingsChanges(SettingsChanges settings_changes_)
{
settings_changes = std::move(settings_changes_);
}
/// Set table expression modifiers
void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
{
@ -142,7 +156,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
String table_function_name;
@ -151,6 +165,7 @@ private:
StorageID storage_id;
StorageSnapshotPtr storage_snapshot;
std::optional<TableExpressionModifiers> table_expression_modifiers;
SettingsChanges settings_changes;
static constexpr size_t arguments_child_index = 0;
static constexpr size_t children_size = arguments_child_index + 1;

View File

@ -86,7 +86,7 @@ QueryTreeNodePtr TableNode::cloneImpl() const
return result_table_node;
}
ASTPtr TableNode::toASTImpl() const
ASTPtr TableNode::toASTImpl(const ConvertToASTOptions & /* options */) const
{
if (!temporary_table_name.empty())
return std::make_shared<ASTTableIdentifier>(temporary_table_name);

View File

@ -106,7 +106,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
StoragePtr storage;

View File

@ -140,12 +140,12 @@ QueryTreeNodePtr UnionNode::cloneImpl() const
return result_union_node;
}
ASTPtr UnionNode::toASTImpl() const
ASTPtr UnionNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
select_with_union_query->union_mode = union_mode;
select_with_union_query->is_normalized = true;
select_with_union_query->children.push_back(getQueriesNode()->toAST());
select_with_union_query->children.push_back(getQueriesNode()->toAST(options));
select_with_union_query->list_of_selects = select_with_union_query->children.back();
if (is_subquery)

View File

@ -143,7 +143,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
bool is_subquery = false;

View File

@ -268,7 +268,7 @@ static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expre
return result_table_expression;
}
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression)
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression, const IQueryTreeNode::ConvertToASTOptions & convert_to_ast_options)
{
auto table_expression_node_type = table_expression->getNodeType();
@ -297,7 +297,7 @@ void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_q
[[fallthrough]];
case QueryTreeNodeType::JOIN:
{
auto table_expression_tables_in_select_query_ast = table_expression->toAST();
auto table_expression_tables_in_select_query_ast = table_expression->toAST(convert_to_ast_options);
tables_in_select_query_ast->children.reserve(table_expression_tables_in_select_query_ast->children.size());
for (auto && table_element_ast : table_expression_tables_in_select_query_ast->children)
tables_in_select_query_ast->children.push_back(std::move(table_element_ast));

View File

@ -40,7 +40,7 @@ std::optional<bool> tryExtractConstantFromConditionNode(const QueryTreeNodePtr &
/** Add table expression in tables in select query children.
* If table expression node is not of identifier node, table node, query node, table function node, join node or array join node type throws logical error exception.
*/
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression);
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression, const IQueryTreeNode::ConvertToASTOptions & convert_to_ast_options);
/// Extract table, table function, query, union from join tree
QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node);

View File

@ -17,8 +17,50 @@ namespace ErrorCodes
extern const int NOT_AN_AGGREGATE;
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
extern const int ILLEGAL_PREWHERE;
}
namespace
{
void validateFilter(const QueryTreeNodePtr & filter_node, std::string_view exception_place_message, const QueryTreeNodePtr & query_node)
{
auto filter_node_result_type = filter_node->getResultType();
if (!filter_node_result_type->canBeUsedInBooleanContext())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
"Invalid type for filter in {}: {}. In query {}",
exception_place_message,
filter_node_result_type->getName(),
query_node->formatASTForErrorMessage());
}
}
void validateFilters(const QueryTreeNodePtr & query_node)
{
const auto & query_node_typed = query_node->as<QueryNode &>();
if (query_node_typed.hasPrewhere())
{
validateFilter(query_node_typed.getPrewhere(), "PREWHERE", query_node);
assertNoFunctionNodes(query_node_typed.getPrewhere(),
"arrayJoin",
ErrorCodes::ILLEGAL_PREWHERE,
"ARRAY JOIN",
"in PREWHERE");
}
if (query_node_typed.hasWhere())
validateFilter(query_node_typed.getWhere(), "WHERE", query_node);
if (query_node_typed.hasHaving())
validateFilter(query_node_typed.getHaving(), "HAVING", query_node);
}
namespace
{
class ValidateGroupByColumnsVisitor : public ConstInDepthQueryTreeVisitor<ValidateGroupByColumnsVisitor>
{
public:
@ -106,7 +148,9 @@ private:
const QueryTreeNodePtr & query_node;
};
void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params)
}
void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidationParams params)
{
const auto & query_node_typed = query_node->as<QueryNode &>();
auto join_tree_node_type = query_node_typed.getJoinTree()->getNodeType();

View File

@ -5,7 +5,10 @@
namespace DB
{
struct ValidationParams
/// Validate PREWHERE, WHERE, HAVING in query node
void validateFilters(const QueryTreeNodePtr & query_node);
struct AggregatesValidationParams
{
bool group_by_use_nulls = false;
};
@ -20,7 +23,7 @@ struct ValidationParams
* PROJECTION.
* 5. Throws exception if there is GROUPING SETS or ROLLUP or CUBE or WITH TOTALS without aggregation.
*/
void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params);
void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidationParams params);
/** Assert that there are no function nodes with specified function name in node children.
* Do not visit subqueries.

View File

@ -107,7 +107,7 @@ QueryTreeNodePtr WindowNode::cloneImpl() const
return window_node;
}
ASTPtr WindowNode::toASTImpl() const
ASTPtr WindowNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto window_definition = std::make_shared<ASTWindowDefinition>();
@ -115,13 +115,13 @@ ASTPtr WindowNode::toASTImpl() const
if (hasPartitionBy())
{
window_definition->children.push_back(getPartitionByNode()->toAST());
window_definition->children.push_back(getPartitionByNode()->toAST(options));
window_definition->partition_by = window_definition->children.back();
}
if (hasOrderBy())
{
window_definition->children.push_back(getOrderByNode()->toAST());
window_definition->children.push_back(getOrderByNode()->toAST(options));
window_definition->order_by = window_definition->children.back();
}
@ -132,7 +132,7 @@ ASTPtr WindowNode::toASTImpl() const
if (hasFrameBeginOffset())
{
window_definition->children.push_back(getFrameBeginOffsetNode()->toAST());
window_definition->children.push_back(getFrameBeginOffsetNode()->toAST(options));
window_definition->frame_begin_offset = window_definition->children.back();
}
@ -140,7 +140,7 @@ ASTPtr WindowNode::toASTImpl() const
window_definition->frame_end_preceding = window_frame.end_preceding;
if (hasFrameEndOffset())
{
window_definition->children.push_back(getFrameEndOffsetNode()->toAST());
window_definition->children.push_back(getFrameEndOffsetNode()->toAST(options));
window_definition->frame_end_offset = window_definition->children.back();
}

View File

@ -175,7 +175,7 @@ protected:
QueryTreeNodePtr cloneImpl() const override;
ASTPtr toASTImpl() const override;
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
private:
static constexpr size_t order_by_child_index = 0;

View File

@ -36,7 +36,7 @@ public:
return std::make_shared<SourceNode>();
}
ASTPtr toASTImpl() const override
ASTPtr toASTImpl(const ConvertToASTOptions & /* options */) const override
{
return nullptr;
}

View File

@ -13,20 +13,20 @@ using FileInfo = IBackupCoordination::FileInfo;
BackupCoordinationLocal::BackupCoordinationLocal() = default;
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
void BackupCoordinationLocal::setStage(const String &, const String &, const String &)
void BackupCoordinationLocal::setStage(const String &, const String &)
{
}
void BackupCoordinationLocal::setError(const String &, const Exception &)
void BackupCoordinationLocal::setError(const Exception &)
{
}
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &)
Strings BackupCoordinationLocal::waitForStage(const String &)
{
return {};
}
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
{
return {};
}
@ -70,29 +70,29 @@ Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_sha
}
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
{
std::lock_guard lock{mutex};
replicated_access.addFilePath(access_zk_path, access_entity_type, host_id, file_path);
replicated_access.addFilePath(access_zk_path, access_entity_type, "", file_path);
}
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
{
std::lock_guard lock{mutex};
return replicated_access.getFilePaths(access_zk_path, access_entity_type, host_id);
return replicated_access.getFilePaths(access_zk_path, access_entity_type, "");
}
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path)
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
{
std::lock_guard lock{mutex};
replicated_sql_objects.addDirectory(loader_zk_path, object_type, host_id, dir_path);
replicated_sql_objects.addDirectory(loader_zk_path, object_type, "", dir_path);
}
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
{
std::lock_guard lock{mutex};
return replicated_sql_objects.getDirectories(loader_zk_path, object_type, host_id);
return replicated_sql_objects.getDirectories(loader_zk_path, object_type, "");
}

View File

@ -21,10 +21,10 @@ public:
BackupCoordinationLocal();
~BackupCoordinationLocal() override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void setStage(const String & new_stage, const String & message) override;
void setError(const Exception & exception) override;
Strings waitForStage(const String & stage_to_wait) override;
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
@ -37,11 +37,11 @@ public:
void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
Strings getReplicatedDataPaths(const String & table_shared_id) const override;
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) override;
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const override;
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
void updateFileInfo(const FileInfo & file_info) override;

View File

@ -166,17 +166,30 @@ namespace
}
}
size_t BackupCoordinationRemote::findCurrentHostIndex(const Strings & all_hosts, const String & current_host)
{
auto it = std::find(all_hosts.begin(), all_hosts.end(), current_host);
if (it == all_hosts.end())
return 0;
return it - all_hosts.begin();
}
BackupCoordinationRemote::BackupCoordinationRemote(
const BackupKeeperSettings & keeper_settings_,
const String & root_zookeeper_path_,
const String & backup_uuid_,
zkutil::GetZooKeeper get_zookeeper_,
const String & root_zookeeper_path_,
const BackupKeeperSettings & keeper_settings_,
const String & backup_uuid_,
const Strings & all_hosts_,
const String & current_host_,
bool is_internal_)
: keeper_settings(keeper_settings_)
: get_zookeeper(get_zookeeper_)
, root_zookeeper_path(root_zookeeper_path_)
, zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
, keeper_settings(keeper_settings_)
, backup_uuid(backup_uuid_)
, get_zookeeper(get_zookeeper_)
, all_hosts(all_hosts_)
, current_host(current_host_)
, current_host_index(findCurrentHostIndex(all_hosts, current_host))
, is_internal(is_internal_)
{
zookeeper_retries_info = ZooKeeperRetriesInfo(
@ -251,22 +264,22 @@ void BackupCoordinationRemote::removeAllNodes()
}
void BackupCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
void BackupCoordinationRemote::setStage(const String & new_stage, const String & message)
{
stage_sync->set(current_host, new_stage, message);
}
void BackupCoordinationRemote::setError(const String & current_host, const Exception & exception)
void BackupCoordinationRemote::setError(const Exception & exception)
{
stage_sync->setError(current_host, exception);
}
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait)
{
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
@ -403,7 +416,7 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
}
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
{
{
std::lock_guard lock{mutex};
@ -416,15 +429,15 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
zk->createIfNotExists(path, "");
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
zk->createIfNotExists(path, "");
path += "/" + host_id;
path += "/" + current_host;
zk->createIfNotExists(path, file_path);
}
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
{
std::lock_guard lock{mutex};
prepareReplicatedAccess();
return replicated_access->getFilePaths(access_zk_path, access_entity_type, host_id);
return replicated_access->getFilePaths(access_zk_path, access_entity_type, current_host);
}
void BackupCoordinationRemote::prepareReplicatedAccess() const
@ -453,7 +466,7 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
}
}
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path)
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
{
{
std::lock_guard lock{mutex};
@ -474,15 +487,15 @@ void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_
}
zk->createIfNotExists(path, "");
path += "/" + host_id;
path += "/" + current_host;
zk->createIfNotExists(path, dir_path);
}
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
{
std::lock_guard lock{mutex};
prepareReplicatedSQLObjects();
return replicated_sql_objects->getDirectories(loader_zk_path, object_type, host_id);
return replicated_sql_objects->getDirectories(loader_zk_path, object_type, current_host);
}
void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
@ -810,12 +823,9 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
if (existing_backup_uuid == toString(backup_uuid))
continue;
String status;
if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
{
if (status != Stage::COMPLETED)
return true;
}
const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
if (status != Stage::COMPLETED)
return true;
}
zk->createIfNotExists(backup_stage_path, "");
@ -830,5 +840,4 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
return false;
}
}

View File

@ -27,17 +27,20 @@ public:
};
BackupCoordinationRemote(
const BackupKeeperSettings & keeper_settings_,
const String & root_zookeeper_path_,
const String & backup_uuid_,
zkutil::GetZooKeeper get_zookeeper_,
const String & root_zookeeper_path_,
const BackupKeeperSettings & keeper_settings_,
const String & backup_uuid_,
const Strings & all_hosts_,
const String & current_host_,
bool is_internal_);
~BackupCoordinationRemote() override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void setStage(const String & new_stage, const String & message) override;
void setError(const Exception & exception) override;
Strings waitForStage(const String & stage_to_wait) override;
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(
const String & table_shared_id,
@ -58,11 +61,11 @@ public:
void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
Strings getReplicatedDataPaths(const String & table_shared_id) const override;
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) override;
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const override;
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
void updateFileInfo(const FileInfo & file_info) override;
@ -78,6 +81,8 @@ public:
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host);
private:
zkutil::ZooKeeperPtr getZooKeeper() const;
zkutil::ZooKeeperPtr getZooKeeperNoLock() const;
@ -91,11 +96,14 @@ private:
void prepareReplicatedAccess() const;
void prepareReplicatedSQLObjects() const;
const BackupKeeperSettings keeper_settings;
const zkutil::GetZooKeeper get_zookeeper;
const String root_zookeeper_path;
const String zookeeper_path;
const BackupKeeperSettings keeper_settings;
const String backup_uuid;
const zkutil::GetZooKeeper get_zookeeper;
const Strings all_hosts;
const String current_host;
const size_t current_host_index;
const bool is_internal;
mutable ZooKeeperRetriesInfo zookeeper_retries_info;

View File

@ -133,22 +133,22 @@ Strings BackupEntriesCollector::setStage(const String & new_stage, const String
LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage)));
current_stage = new_stage;
backup_coordination->setStage(backup_settings.host_id, new_stage, message);
backup_coordination->setStage(new_stage, message);
if (new_stage == Stage::formatGatheringMetadata(1))
{
return backup_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
return backup_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
}
else if (new_stage.starts_with(Stage::GATHERING_METADATA))
{
auto current_time = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(current_time, consistent_metadata_snapshot_end_time);
return backup_coordination->waitForStage(
all_hosts, new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
}
else
{
return backup_coordination->waitForStage(all_hosts, new_stage);
return backup_coordination->waitForStage(new_stage);
}
}

View File

@ -38,14 +38,33 @@ namespace Stage = BackupCoordinationStage;
namespace
{
std::shared_ptr<IBackupCoordination> makeBackupCoordination(std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings, String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
std::shared_ptr<IBackupCoordination> makeBackupCoordination(const ContextPtr & context, const BackupSettings & backup_settings, bool remote)
{
if (!root_zk_path.empty())
if (remote)
{
if (!keeper_settings.has_value())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parameter keeper_settings is empty while root_zk_path is not. This is bug");
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<BackupCoordinationRemote>(*keeper_settings, root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
BackupCoordinationRemote::BackupKeeperSettings keeper_settings
{
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
};
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
return std::make_shared<BackupCoordinationRemote>(
get_zookeeper,
root_zk_path,
keeper_settings,
toString(*backup_settings.backup_uuid),
all_hosts,
backup_settings.host_id,
backup_settings.internal);
}
else
{
@ -53,12 +72,19 @@ namespace
}
}
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & root_zk_path, const String & restore_uuid, const ContextPtr & context, bool is_internal_backup)
std::shared_ptr<IRestoreCoordination>
makeRestoreCoordination(const ContextPtr & context, const RestoreSettings & restore_settings, bool remote)
{
if (!root_zk_path.empty())
if (remote)
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<RestoreCoordinationRemote>(root_zk_path, restore_uuid, get_zookeeper, is_internal_backup);
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
return std::make_shared<RestoreCoordinationRemote>(get_zookeeper, root_zk_path, toString(*restore_settings.restore_uuid), all_hosts, restore_settings.host_id, restore_settings.internal);
}
else
{
@ -68,12 +94,12 @@ namespace
/// Sends information about an exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host, const Exception & exception)
void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const Exception & exception)
{
try
{
if (coordination)
coordination->setError(current_host, exception);
coordination->setError(exception);
}
catch (...)
{
@ -82,7 +108,7 @@ namespace
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination)
{
try
{
@ -90,12 +116,12 @@ namespace
}
catch (const Exception & e)
{
sendExceptionToCoordination(coordination, current_host, e);
sendExceptionToCoordination(coordination, e);
}
catch (...)
{
if (coordination)
coordination->setError(current_host, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
coordination->setError(Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
}
}
@ -162,24 +188,13 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
else
backup_id = toString(*backup_settings.backup_uuid);
String root_zk_path;
std::shared_ptr<IBackupCoordination> backup_coordination;
if (backup_settings.internal)
{
/// The following call of makeBackupCoordination() is not essential because doBackup() will later create a backup coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startMakingBackup() other hosts will know about that.
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
BackupCoordinationRemote::BackupKeeperSettings keeper_settings
{
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
};
backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
backup_coordination = makeBackupCoordination(context, backup_settings, /* remote= */ true);
}
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
@ -238,7 +253,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
/// Something bad happened, the backup has not built.
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
sendCurrentExceptionToCoordination(backup_coordination);
throw;
}
}
@ -274,19 +289,9 @@ void BackupsWorker::doBackup(
if (!on_cluster)
context->checkAccess(required_access);
String root_zk_path;
std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings;
ClusterPtr cluster;
if (on_cluster)
{
keeper_settings = BackupCoordinationRemote::BackupKeeperSettings
{
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
};
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
cluster = context->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
@ -294,7 +299,7 @@ void BackupsWorker::doBackup(
/// Make a backup coordination.
if (!backup_coordination)
backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
backup_coordination = makeBackupCoordination(context, backup_settings, /* remote= */ on_cluster);
if (!allow_concurrent_backups && backup_coordination->hasConcurrentBackups(std::ref(num_active_backups)))
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent backups not supported, turn on setting 'allow_concurrent_backups'");
@ -330,9 +335,7 @@ void BackupsWorker::doBackup(
executeDDLQueryOnCluster(backup_query, mutable_context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->waitForStage(all_hosts, Stage::COMPLETED);
backup_coordination->waitForStage(Stage::COMPLETED);
}
else
{
@ -349,7 +352,7 @@ void BackupsWorker::doBackup(
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
backup_coordination->setStage(backup_settings.host_id, Stage::COMPLETED, "");
backup_coordination->setStage(Stage::COMPLETED, "");
}
size_t num_files = 0;
@ -383,7 +386,7 @@ void BackupsWorker::doBackup(
{
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
sendCurrentExceptionToCoordination(backup_coordination);
}
else
{
@ -417,8 +420,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
/// The following call of makeRestoreCoordination() is not essential because doRestore() will later create a restore coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startRestoring() other hosts will know about that.
auto root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ true);
}
try
@ -474,7 +476,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
{
/// Something bad happened, the backup has not built.
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
sendCurrentExceptionToCoordination(restore_coordination);
throw;
}
}
@ -509,14 +511,12 @@ void BackupsWorker::doRestore(
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context->getCurrentDatabase();
String root_zk_path;
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
bool on_cluster = !restore_query->cluster.empty();
if (on_cluster)
{
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
cluster = context->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
@ -539,7 +539,7 @@ void BackupsWorker::doRestore(
/// Make a restore coordination.
if (!restore_coordination)
restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
@ -561,9 +561,7 @@ void BackupsWorker::doRestore(
executeDDLQueryOnCluster(restore_query, context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->waitForStage(all_hosts, Stage::COMPLETED);
restore_coordination->waitForStage(Stage::COMPLETED);
}
else
{
@ -581,7 +579,7 @@ void BackupsWorker::doRestore(
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
restore_coordination->setStage(Stage::COMPLETED, "");
}
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging);
@ -603,7 +601,7 @@ void BackupsWorker::doRestore(
{
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
sendCurrentExceptionToCoordination(restore_coordination);
}
else
{

View File

@ -22,10 +22,10 @@ public:
virtual ~IBackupCoordination() = default;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
virtual void setStage(const String & new_stage, const String & message) = 0;
virtual void setError(const Exception & exception) = 0;
virtual Strings waitForStage(const String & stage_to_wait) = 0;
virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
struct PartNameAndChecksum
{
@ -66,12 +66,12 @@ public:
virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0;
/// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage.
virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) = 0;
virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const = 0;
virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) = 0;
virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const = 0;
/// Adds a path to a directory with user-defined SQL objects inside the backup.
virtual void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) = 0;
virtual Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const = 0;
virtual void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) = 0;
virtual Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const = 0;
struct FileInfo
{

View File

@ -18,10 +18,10 @@ public:
virtual ~IRestoreCoordination() = default;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
virtual void setStage(const String & new_stage, const String & message) = 0;
virtual void setError(const Exception & exception) = 0;
virtual Strings waitForStage(const String & stage_to_wait) = 0;
virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
static constexpr const char * kErrorStatus = "error";

View File

@ -7,20 +7,20 @@ namespace DB
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
void RestoreCoordinationLocal::setStage(const String &, const String &, const String &)
void RestoreCoordinationLocal::setStage(const String &, const String &)
{
}
void RestoreCoordinationLocal::setError(const String &, const Exception &)
void RestoreCoordinationLocal::setError(const Exception &)
{
}
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &)
Strings RestoreCoordinationLocal::waitForStage(const String &)
{
return {};
}
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
Strings RestoreCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
{
return {};
}

View File

@ -19,10 +19,10 @@ public:
~RestoreCoordinationLocal() override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void setStage(const String & new_stage, const String & message) override;
void setError(const Exception & exception) override;
Strings waitForStage(const String & stage_to_wait) override;
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;

Some files were not shown because too many files have changed in this diff Show More