mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Merge branch 'master' into zk_inject_timeout
This commit is contained in:
commit
ff209db129
@ -180,7 +180,6 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
# Can be lld or ld-lld or lld-13 or /path/to/lld.
|
||||
if (LINKER_NAME MATCHES "lld" AND OS_LINUX)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
|
||||
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
|
||||
endif ()
|
||||
endif()
|
||||
|
@ -19,8 +19,8 @@ endif()
|
||||
if (NOT "$ENV{CFLAGS}" STREQUAL ""
|
||||
OR NOT "$ENV{CXXFLAGS}" STREQUAL ""
|
||||
OR NOT "$ENV{LDFLAGS}" STREQUAL ""
|
||||
OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_SHARED_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
|
||||
OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_SHARED_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)
|
||||
OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
|
||||
OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)
|
||||
|
||||
# if $ENV
|
||||
message("CFLAGS: $ENV{CFLAGS}")
|
||||
@ -36,7 +36,6 @@ if (NOT "$ENV{CFLAGS}" STREQUAL ""
|
||||
message("CMAKE_C_FLAGS_INIT: ${CMAKE_C_FLAGS_INIT}")
|
||||
message("CMAKE_CXX_FLAGS_INIT: ${CMAKE_CXX_FLAGS_INIT}")
|
||||
message("CMAKE_EXE_LINKER_FLAGS_INIT: ${CMAKE_EXE_LINKER_FLAGS_INIT}")
|
||||
message("CMAKE_SHARED_LINKER_FLAGS_INIT: ${CMAKE_SHARED_LINKER_FLAGS_INIT}")
|
||||
message("CMAKE_MODULE_LINKER_FLAGS_INIT: ${CMAKE_MODULE_LINKER_FLAGS_INIT}")
|
||||
|
||||
message(FATAL_ERROR "
|
||||
|
@ -732,9 +732,10 @@ public:
|
||||
if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs)))
|
||||
return is_negative(rhs);
|
||||
|
||||
integer<Bits, Signed> t = rhs;
|
||||
for (unsigned i = 0; i < item_count; ++i)
|
||||
{
|
||||
base_type rhs_item = get_item(rhs, big(i));
|
||||
base_type rhs_item = get_item(t, big(i));
|
||||
|
||||
if (lhs.items[big(i)] != rhs_item)
|
||||
return lhs.items[big(i)] > rhs_item;
|
||||
@ -757,9 +758,10 @@ public:
|
||||
if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs)))
|
||||
return is_negative(lhs);
|
||||
|
||||
integer<Bits, Signed> t = rhs;
|
||||
for (unsigned i = 0; i < item_count; ++i)
|
||||
{
|
||||
base_type rhs_item = get_item(rhs, big(i));
|
||||
base_type rhs_item = get_item(t, big(i));
|
||||
|
||||
if (lhs.items[big(i)] != rhs_item)
|
||||
return lhs.items[big(i)] < rhs_item;
|
||||
@ -779,9 +781,10 @@ public:
|
||||
{
|
||||
if constexpr (should_keep_size<T>())
|
||||
{
|
||||
integer<Bits, Signed> t = rhs;
|
||||
for (unsigned i = 0; i < item_count; ++i)
|
||||
{
|
||||
base_type rhs_item = get_item(rhs, any(i));
|
||||
base_type rhs_item = get_item(t, any(i));
|
||||
|
||||
if (lhs.items[any(i)] != rhs_item)
|
||||
return false;
|
||||
|
@ -22,7 +22,6 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
|
||||
set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=bfd")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=bfd")
|
||||
|
||||
# Currently, lld does not work with the error:
|
||||
# ld.lld: error: section size decrease is too large
|
||||
|
@ -30,7 +30,6 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
|
||||
|
@ -95,10 +95,8 @@ if (LINKER_NAME)
|
||||
configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
|
||||
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
|
||||
else ()
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
@ -3,7 +3,9 @@ set -ex
|
||||
set -o pipefail
|
||||
trap "exit" INT TERM
|
||||
trap 'kill $(jobs -pr) ||:' EXIT
|
||||
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
|
||||
BUILD_NAME=${BUILD_NAME:-package_release}
|
||||
export S3_URL BUILD_NAME
|
||||
|
||||
mkdir db0 ||:
|
||||
mkdir left ||:
|
||||
@ -28,8 +30,9 @@ function download
|
||||
# Historically there were various paths for the performance test package.
|
||||
# Test all of them.
|
||||
declare -a urls_to_try=(
|
||||
"https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
|
||||
"https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
|
||||
"$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
|
||||
"$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
|
||||
"$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
|
||||
)
|
||||
|
||||
for path in "${urls_to_try[@]}"
|
||||
|
@ -6,11 +6,7 @@ export CHPC_CHECK_START_TIMESTAMP
|
||||
|
||||
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
|
||||
BUILD_NAME=${BUILD_NAME:-package_release}
|
||||
|
||||
COMMON_BUILD_PREFIX="/clickhouse_build_check"
|
||||
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
|
||||
COMMON_BUILD_PREFIX=""
|
||||
fi
|
||||
export S3_URL BUILD_NAME
|
||||
|
||||
# Sometimes AWS responde with DNS error and it's impossible to retry it with
|
||||
# current curl version options.
|
||||
@ -66,8 +62,9 @@ function find_reference_sha
|
||||
# test all of them.
|
||||
unset found
|
||||
declare -a urls_to_try=(
|
||||
"https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
|
||||
"https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tgz"
|
||||
"$S3_URL/PRs/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
|
||||
"$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
|
||||
"$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tgz"
|
||||
)
|
||||
for path in "${urls_to_try[@]}"
|
||||
do
|
||||
@ -92,10 +89,15 @@ chmod 777 workspace output
|
||||
cd workspace
|
||||
|
||||
# Download the package for the version we are going to test.
|
||||
if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
|
||||
then
|
||||
right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
|
||||
fi
|
||||
# A temporary solution for migrating into PRs directory
|
||||
for prefix in "$S3_URL/PRs" "$S3_URL";
|
||||
do
|
||||
if curl_with_retry "$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
|
||||
then
|
||||
right_path="$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
mkdir right
|
||||
wget -nv -nd -c "$right_path" -O- | tar -C right --no-same-owner --strip-components=1 --zstd --extract --verbose
|
||||
|
@ -26,6 +26,7 @@ logging.basicConfig(
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
|
||||
|
||||
# Thread executor that does not hides exception that happens during function
|
||||
# execution, and rethrows it after join()
|
||||
class SafeThread(Thread):
|
||||
@ -158,6 +159,7 @@ for e in subst_elems:
|
||||
|
||||
available_parameters[name] = values
|
||||
|
||||
|
||||
# Takes parallel lists of templates, substitutes them with all combos of
|
||||
# parameters. The set of parameters is determined based on the first list.
|
||||
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
|
||||
|
@ -670,7 +670,6 @@ if args.report == "main":
|
||||
)
|
||||
|
||||
elif args.report == "all-queries":
|
||||
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
@ -10,31 +10,38 @@ import requests
|
||||
import tempfile
|
||||
|
||||
|
||||
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
|
||||
DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
|
||||
|
||||
AVAILABLE_DATASETS = {
|
||||
'hits': 'hits_v1.tar',
|
||||
'visits': 'visits_v1.tar',
|
||||
"hits": "hits_v1.tar",
|
||||
"visits": "visits_v1.tar",
|
||||
}
|
||||
|
||||
RETRIES_COUNT = 5
|
||||
|
||||
|
||||
def _get_temp_file_name():
|
||||
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
||||
return os.path.join(
|
||||
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||
)
|
||||
|
||||
|
||||
def build_url(base_url, dataset):
|
||||
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
|
||||
return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
|
||||
|
||||
|
||||
def dowload_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(RETRIES_COUNT):
|
||||
try:
|
||||
with open(path, 'wb') as f:
|
||||
with open(path, "wb") as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get('content-length')
|
||||
total_length = response.headers.get("content-length")
|
||||
if total_length is None or int(total_length) == 0:
|
||||
logging.info("No content-length, will download file without progress")
|
||||
logging.info(
|
||||
"No content-length, will download file without progress"
|
||||
)
|
||||
f.write(response.content)
|
||||
else:
|
||||
dl = 0
|
||||
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
|
||||
if sys.stdout.isatty():
|
||||
done = int(50 * dl / total_length)
|
||||
percent = int(100 * float(dl) / total_length)
|
||||
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
|
||||
sys.stdout.write(
|
||||
"\r[{}{}] {}%".format(
|
||||
"=" * done, " " * (50 - done), percent
|
||||
)
|
||||
)
|
||||
sys.stdout.flush()
|
||||
break
|
||||
except Exception as ex:
|
||||
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
|
||||
if os.path.exists(path):
|
||||
os.remove(path)
|
||||
else:
|
||||
raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
|
||||
raise Exception(
|
||||
"Cannot download dataset from {}, all retries exceeded".format(url)
|
||||
)
|
||||
|
||||
sys.stdout.write("\n")
|
||||
logging.info("Downloading finished")
|
||||
|
||||
|
||||
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
|
||||
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
|
||||
with tarfile.open(tar_path, 'r') as comp_file:
|
||||
logging.info(
|
||||
"Will unpack data from temp path %s to clickhouse db %s",
|
||||
tar_path,
|
||||
clickhouse_path,
|
||||
)
|
||||
with tarfile.open(tar_path, "r") as comp_file:
|
||||
comp_file.extractall(path=clickhouse_path)
|
||||
logging.info("Unpack finished")
|
||||
|
||||
@ -72,15 +90,21 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Simple tool for dowloading datasets for clickhouse from S3")
|
||||
description="Simple tool for dowloading datasets for clickhouse from S3"
|
||||
)
|
||||
|
||||
parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
|
||||
parser.add_argument('--url-prefix', default=DEFAULT_URL)
|
||||
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
|
||||
parser.add_argument(
|
||||
"--dataset-names",
|
||||
required=True,
|
||||
nargs="+",
|
||||
choices=list(AVAILABLE_DATASETS.keys()),
|
||||
)
|
||||
parser.add_argument("--url-prefix", default=DEFAULT_URL)
|
||||
parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
|
||||
|
||||
args = parser.parse_args()
|
||||
datasets = args.dataset_names
|
||||
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
|
||||
logging.info("Will fetch following datasets: %s", ", ".join(datasets))
|
||||
for dataset in datasets:
|
||||
logging.info("Processing %s", dataset)
|
||||
temp_archive_path = _get_temp_file_name()
|
||||
@ -92,10 +116,11 @@ if __name__ == "__main__":
|
||||
logging.info("Some exception occured %s", str(ex))
|
||||
raise
|
||||
finally:
|
||||
logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
|
||||
logging.info(
|
||||
"Will remove downloaded file %s from filesystem if it exists",
|
||||
temp_archive_path,
|
||||
)
|
||||
if os.path.exists(temp_archive_path):
|
||||
os.remove(temp_archive_path)
|
||||
logging.info("Processing of %s finished", dataset)
|
||||
logging.info("Fetch finished, enjoy your tables!")
|
||||
|
||||
|
||||
|
@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
aspell \
|
||||
curl \
|
||||
git \
|
||||
file \
|
||||
libxml2-utils \
|
||||
moreutils \
|
||||
python3-fuzzywuzzy \
|
||||
python3-pip \
|
||||
shellcheck \
|
||||
yamllint \
|
||||
&& pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
|
||||
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /root/.cache/pip
|
||||
|
||||
|
265
docs/en/getting-started/example-datasets/covid19.md
Normal file
265
docs/en/getting-started/example-datasets/covid19.md
Normal file
@ -0,0 +1,265 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/covid19
|
||||
sidebar_label: COVID-19 Open-Data
|
||||
---
|
||||
|
||||
# COVID-19 Open-Data
|
||||
|
||||
COVID-19 Open-Data attempts to assemble the largest Covid-19 epidemiological database, in addition to a powerful set of expansive covariates. It includes open, publicly sourced, licensed data relating to demographics, economy, epidemiology, geography, health, hospitalizations, mobility, government response, weather, and more.
|
||||
|
||||
The details are in GitHub [here](https://github.com/GoogleCloudPlatform/covid-19-open-data).
|
||||
|
||||
It's easy to insert this data into ClickHouse...
|
||||
|
||||
:::note
|
||||
The following commands were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). You can easily run them on a local install as well.
|
||||
:::
|
||||
|
||||
1. Let's see what the data looks like:
|
||||
|
||||
```sql
|
||||
DESCRIBE url(
|
||||
'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
|
||||
'CSVWithNames'
|
||||
);
|
||||
```
|
||||
|
||||
The CSV file has 10 columns:
|
||||
|
||||
```response
|
||||
┌─name─────────────────┬─type─────────────┐
|
||||
│ date │ Nullable(String) │
|
||||
│ location_key │ Nullable(String) │
|
||||
│ new_confirmed │ Nullable(Int64) │
|
||||
│ new_deceased │ Nullable(Int64) │
|
||||
│ new_recovered │ Nullable(Int64) │
|
||||
│ new_tested │ Nullable(Int64) │
|
||||
│ cumulative_confirmed │ Nullable(Int64) │
|
||||
│ cumulative_deceased │ Nullable(Int64) │
|
||||
│ cumulative_recovered │ Nullable(Int64) │
|
||||
│ cumulative_tested │ Nullable(Int64) │
|
||||
└──────────────────────┴──────────────────┘
|
||||
|
||||
10 rows in set. Elapsed: 0.745 sec.
|
||||
```
|
||||
|
||||
2. Now let's view some of the rows:
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM url('https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv')
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
Notice the `url` function easily reads data from a CSV file:
|
||||
|
||||
```response
|
||||
┌─c1─────────┬─c2───────────┬─c3────────────┬─c4───────────┬─c5────────────┬─c6─────────┬─c7───────────────────┬─c8──────────────────┬─c9───────────────────┬─c10───────────────┐
|
||||
│ date │ location_key │ new_confirmed │ new_deceased │ new_recovered │ new_tested │ cumulative_confirmed │ cumulative_deceased │ cumulative_recovered │ cumulative_tested │
|
||||
│ 2020-04-03 │ AD │ 24 │ 1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 466 │ 17 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
│ 2020-04-04 │ AD │ 57 │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 523 │ 17 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
│ 2020-04-05 │ AD │ 17 │ 4 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 540 │ 21 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
│ 2020-04-06 │ AD │ 11 │ 1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 551 │ 22 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
│ 2020-04-07 │ AD │ 15 │ 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 566 │ 24 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
│ 2020-04-08 │ AD │ 23 │ 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 589 │ 26 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└────────────┴──────────────┴───────────────┴──────────────┴───────────────┴────────────┴──────────────────────┴─────────────────────┴──────────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
3. We will create a table now that we know what the data looks like:
|
||||
|
||||
```sql
|
||||
CREATE TABLE covid19 (
|
||||
date Date,
|
||||
location_key LowCardinality(String),
|
||||
new_confirmed Int32,
|
||||
new_deceased Int32,
|
||||
new_recovered Int32,
|
||||
new_tested Int32,
|
||||
cumulative_confirmed Int32,
|
||||
cumulative_deceased Int32,
|
||||
cumulative_recovered Int32,
|
||||
cumulative_tested Int32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (location_key, date);
|
||||
```
|
||||
|
||||
4. The following command inserts the entire dataset into the `covid19` table:
|
||||
|
||||
```sql
|
||||
INSERT INTO covid19
|
||||
SELECT *
|
||||
FROM
|
||||
url(
|
||||
'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
|
||||
CSVWithNames,
|
||||
'date Date,
|
||||
location_key LowCardinality(String),
|
||||
new_confirmed Int32,
|
||||
new_deceased Int32,
|
||||
new_recovered Int32,
|
||||
new_tested Int32,
|
||||
cumulative_confirmed Int32,
|
||||
cumulative_deceased Int32,
|
||||
cumulative_recovered Int32,
|
||||
cumulative_tested Int32'
|
||||
);
|
||||
```
|
||||
|
||||
5. It goes pretty quick - let's see how many rows were inserted:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableQuantity(count())
|
||||
FROM covid19;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─formatReadableQuantity(count())─┐
|
||||
│ 12.53 million │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
6. Let's see how many total cases of Covid-19 were recorded:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableQuantity(sum(new_confirmed))
|
||||
FROM covid19;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─formatReadableQuantity(sum(new_confirmed))─┐
|
||||
│ 1.39 billion │
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
7. You will notice the data has a lot of 0's for dates - either weekends or days where numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
AVG(new_confirmed) OVER (PARTITION BY location_key ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS cases_smoothed,
|
||||
new_confirmed,
|
||||
location_key,
|
||||
date
|
||||
FROM covid19;
|
||||
```
|
||||
|
||||
8. This query determines the latest values for each location. We can't use `max(date)` because not all countries reported every day, so we grab the last row using `ROW_NUMBER`:
|
||||
|
||||
```sql
|
||||
WITH latest_deaths_data AS
|
||||
( SELECT location_key,
|
||||
date,
|
||||
new_deceased,
|
||||
new_confirmed,
|
||||
ROW_NUMBER() OVER (PARTITION BY location_key ORDER BY date DESC) as rn
|
||||
FROM covid19)
|
||||
SELECT location_key,
|
||||
date,
|
||||
new_deceased,
|
||||
new_confirmed,
|
||||
rn
|
||||
FROM latest_deaths_data
|
||||
WHERE rn=1;
|
||||
```
|
||||
|
||||
9. We can use `lagInFrame` to determine the `LAG` of new cases each day. In this query we filter by the `US_DC` location:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
new_confirmed - lagInFrame(new_confirmed,1) OVER (PARTITION BY location_key ORDER BY date) AS confirmed_cases_delta,
|
||||
new_confirmed,
|
||||
location_key,
|
||||
date
|
||||
FROM covid19
|
||||
WHERE location_key = 'US_DC';
|
||||
```
|
||||
|
||||
The response look like:
|
||||
|
||||
```response
|
||||
┌─confirmed_cases_delta─┬─new_confirmed─┬─location_key─┬───────date─┐
|
||||
│ 0 │ 0 │ US_DC │ 2020-03-08 │
|
||||
│ 2 │ 2 │ US_DC │ 2020-03-09 │
|
||||
│ -2 │ 0 │ US_DC │ 2020-03-10 │
|
||||
│ 6 │ 6 │ US_DC │ 2020-03-11 │
|
||||
│ -6 │ 0 │ US_DC │ 2020-03-12 │
|
||||
│ 0 │ 0 │ US_DC │ 2020-03-13 │
|
||||
│ 6 │ 6 │ US_DC │ 2020-03-14 │
|
||||
│ -5 │ 1 │ US_DC │ 2020-03-15 │
|
||||
│ 4 │ 5 │ US_DC │ 2020-03-16 │
|
||||
│ 4 │ 9 │ US_DC │ 2020-03-17 │
|
||||
│ -1 │ 8 │ US_DC │ 2020-03-18 │
|
||||
│ 24 │ 32 │ US_DC │ 2020-03-19 │
|
||||
│ -26 │ 6 │ US_DC │ 2020-03-20 │
|
||||
│ 15 │ 21 │ US_DC │ 2020-03-21 │
|
||||
│ -3 │ 18 │ US_DC │ 2020-03-22 │
|
||||
│ 3 │ 21 │ US_DC │ 2020-03-23 │
|
||||
```
|
||||
|
||||
10. This query calculates the percentage of change in new cases each day, and includes a simple `increase` or `decrease` column in the result set:
|
||||
|
||||
```sql
|
||||
WITH confirmed_lag AS (
|
||||
SELECT
|
||||
*,
|
||||
lagInFrame(new_confirmed) OVER(
|
||||
PARTITION BY location_key
|
||||
ORDER BY date
|
||||
) AS confirmed_previous_day
|
||||
FROM covid19
|
||||
),
|
||||
confirmed_percent_change AS (
|
||||
SELECT
|
||||
*,
|
||||
COALESCE(ROUND((new_confirmed - confirmed_previous_day) / confirmed_previous_day * 100), 0) AS percent_change
|
||||
FROM confirmed_lag
|
||||
)
|
||||
SELECT
|
||||
date,
|
||||
new_confirmed,
|
||||
percent_change,
|
||||
CASE
|
||||
WHEN percent_change > 0 THEN 'increase'
|
||||
WHEN percent_change = 0 THEN 'no change'
|
||||
ELSE 'decrease'
|
||||
END AS trend
|
||||
FROM confirmed_percent_change
|
||||
WHERE location_key = 'US_DC';
|
||||
```
|
||||
|
||||
The results look like
|
||||
|
||||
```response
|
||||
┌───────date─┬─new_confirmed─┬─percent_change─┬─trend─────┐
|
||||
│ 2020-03-08 │ 0 │ nan │ decrease │
|
||||
│ 2020-03-09 │ 2 │ inf │ increase │
|
||||
│ 2020-03-10 │ 0 │ -100 │ decrease │
|
||||
│ 2020-03-11 │ 6 │ inf │ increase │
|
||||
│ 2020-03-12 │ 0 │ -100 │ decrease │
|
||||
│ 2020-03-13 │ 0 │ nan │ decrease │
|
||||
│ 2020-03-14 │ 6 │ inf │ increase │
|
||||
│ 2020-03-15 │ 1 │ -83 │ decrease │
|
||||
│ 2020-03-16 │ 5 │ 400 │ increase │
|
||||
│ 2020-03-17 │ 9 │ 80 │ increase │
|
||||
│ 2020-03-18 │ 8 │ -11 │ decrease │
|
||||
│ 2020-03-19 │ 32 │ 300 │ increase │
|
||||
│ 2020-03-20 │ 6 │ -81 │ decrease │
|
||||
│ 2020-03-21 │ 21 │ 250 │ increase │
|
||||
│ 2020-03-22 │ 18 │ -14 │ decrease │
|
||||
│ 2020-03-23 │ 21 │ 17 │ increase │
|
||||
│ 2020-03-24 │ 46 │ 119 │ increase │
|
||||
│ 2020-03-25 │ 48 │ 4 │ increase │
|
||||
│ 2020-03-26 │ 36 │ -25 │ decrease │
|
||||
│ 2020-03-27 │ 37 │ 3 │ increase │
|
||||
│ 2020-03-28 │ 38 │ 3 │ increase │
|
||||
│ 2020-03-29 │ 59 │ 55 │ increase │
|
||||
│ 2020-03-30 │ 94 │ 59 │ increase │
|
||||
│ 2020-03-31 │ 91 │ -3 │ decrease │
|
||||
│ 2020-04-01 │ 67 │ -26 │ decrease │
|
||||
│ 2020-04-02 │ 104 │ 55 │ increase │
|
||||
│ 2020-04-03 │ 145 │ 39 │ increase │
|
||||
```
|
||||
|
||||
:::note
|
||||
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
|
||||
:::
|
219
docs/en/getting-started/example-datasets/youtube-dislikes.md
Normal file
219
docs/en/getting-started/example-datasets/youtube-dislikes.md
Normal file
@ -0,0 +1,219 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/youtube-dislikes
|
||||
sidebar_label: YouTube Dislikes
|
||||
description: A collection is dislikes of YouTube videos.
|
||||
---
|
||||
|
||||
# YouTube dataset of dislikes
|
||||
|
||||
In November of 2021, YouTube removed the public ***dislike*** count from all of its videos. While creators can still see the number of dislikes, viewers can only see how many ***likes*** a video has received.
|
||||
|
||||
:::important
|
||||
The dataset has over 4.55 billion records, so be careful just copying-and-pasting the commands below unless your resources can handle that type of volume. The commands below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud).
|
||||
:::
|
||||
|
||||
The data is in a JSON format and can be downloaded from [archive.org](https://archive.org/download/dislikes_youtube_2021_12_video_json_files). We have made this same data available in S3 so that it can be downloaded more efficiently into a ClickHouse Cloud instance.
|
||||
|
||||
Here are the steps to create a table in ClickHouse Cloud and insert the data.
|
||||
|
||||
:::note
|
||||
The steps below will easily work on a local install of ClickHouse too. The only change would be to use the `s3` function instead of `s3cluster` (unless you have a cluster configured - in which case change `default` to the name of your cluster).
|
||||
:::
|
||||
|
||||
## Step-by-step instructions
|
||||
|
||||
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
|
||||
|
||||
```sql
|
||||
DESCRIBE s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
|
||||
'JSONLines'
|
||||
);
|
||||
```
|
||||
|
||||
ClickHouse infers the following schema from the JSON file:
|
||||
|
||||
```response
|
||||
┌─name────────────────┬─type─────────────────────────────────┐
|
||||
│ id │ Nullable(String) │
|
||||
│ fetch_date │ Nullable(Int64) │
|
||||
│ upload_date │ Nullable(String) │
|
||||
│ title │ Nullable(String) │
|
||||
│ uploader_id │ Nullable(String) │
|
||||
│ uploader │ Nullable(String) │
|
||||
│ uploader_sub_count │ Nullable(Int64) │
|
||||
│ is_age_limit │ Nullable(Bool) │
|
||||
│ view_count │ Nullable(Int64) │
|
||||
│ like_count │ Nullable(Int64) │
|
||||
│ dislike_count │ Nullable(Int64) │
|
||||
│ is_crawlable │ Nullable(Bool) │
|
||||
│ is_live_content │ Nullable(Bool) │
|
||||
│ has_subtitles │ Nullable(Bool) │
|
||||
│ is_ads_enabled │ Nullable(Bool) │
|
||||
│ is_comments_enabled │ Nullable(Bool) │
|
||||
│ description │ Nullable(String) │
|
||||
│ rich_metadata │ Array(Map(String, Nullable(String))) │
|
||||
│ super_titles │ Array(Map(String, Nullable(String))) │
|
||||
│ uploader_badges │ Nullable(String) │
|
||||
│ video_badges │ Nullable(String) │
|
||||
└─────────────────────┴──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
2. Based on the inferred schema, we cleaned up the data types and added a primary key. Define the following table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE youtube
|
||||
(
|
||||
`id` String,
|
||||
`fetch_date` DateTime,
|
||||
`upload_date` String,
|
||||
`title` String,
|
||||
`uploader_id` String,
|
||||
`uploader` String,
|
||||
`uploader_sub_count` Int64,
|
||||
`is_age_limit` Bool,
|
||||
`view_count` Int64,
|
||||
`like_count` Int64,
|
||||
`dislike_count` Int64,
|
||||
`is_crawlable` Bool,
|
||||
`has_subtitles` Bool,
|
||||
`is_ads_enabled` Bool,
|
||||
`is_comments_enabled` Bool,
|
||||
`description` String,
|
||||
`rich_metadata` Array(Map(String, String)),
|
||||
`super_titles` Array(Map(String, String)),
|
||||
`uploader_badges` String,
|
||||
`video_badges` String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (upload_date, uploader);
|
||||
```
|
||||
|
||||
3. The following command streams the records from the S3 files into the `youtube` table.
|
||||
|
||||
:::important
|
||||
This inserts a lot of data - 4.65 billion rows. If you do not want the entire dataset, simply add a `LIMIT` clause with the desired number of rows.
|
||||
:::
|
||||
|
||||
```sql
|
||||
INSERT INTO youtube
|
||||
SETTINGS input_format_null_as_default = 1
|
||||
SELECT
|
||||
id,
|
||||
parseDateTimeBestEffortUS(toString(fetch_date)) AS fetch_date,
|
||||
upload_date,
|
||||
ifNull(title, '') AS title,
|
||||
uploader_id,
|
||||
ifNull(uploader, '') AS uploader,
|
||||
uploader_sub_count,
|
||||
is_age_limit,
|
||||
view_count,
|
||||
like_count,
|
||||
dislike_count,
|
||||
is_crawlable,
|
||||
has_subtitles,
|
||||
is_ads_enabled,
|
||||
is_comments_enabled,
|
||||
ifNull(description, '') AS description,
|
||||
rich_metadata,
|
||||
super_titles,
|
||||
ifNull(uploader_badges, '') AS uploader_badges,
|
||||
ifNull(video_badges, '') AS video_badges
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
|
||||
'JSONLines'
|
||||
);
|
||||
```
|
||||
|
||||
4. Open a new tab in the SQL Console of ClickHouse Cloud (or a new `clickhouse-client` window) and watch the count increase. It will take a while to insert 4.56B rows, depending on your server resources. (Withtout any tweaking of settings, it takes about 4.5 hours.)
|
||||
|
||||
```sql
|
||||
SELECT formatReadableQuantity(count())
|
||||
FROM youtube
|
||||
```
|
||||
|
||||
```response
|
||||
┌─formatReadableQuantity(count())─┐
|
||||
│ 4.56 billion │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
5. Once the data is inserted, go ahead and count the number of dislikes of your favorite videos or channels. Let's see how many videos were uploaded by ClickHouse:
|
||||
|
||||
```sql
|
||||
SELECT count()
|
||||
FROM youtube
|
||||
WHERE uploader = 'ClickHouse';
|
||||
```
|
||||
|
||||
```response
|
||||
┌─count()─┐
|
||||
│ 84 │
|
||||
└─────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.570 sec. Processed 237.57 thousand rows, 5.77 MB (416.54 thousand rows/s., 10.12 MB/s.)
|
||||
```
|
||||
|
||||
:::note
|
||||
The query above runs so quickly because we chose `uploader` as the first column of the primary key - so it only had to process 237k rows.
|
||||
:::
|
||||
|
||||
6. Let's look and likes and dislikes of ClickHouse videos:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
title,
|
||||
like_count,
|
||||
dislike_count
|
||||
FROM youtube
|
||||
WHERE uploader = 'ClickHouse'
|
||||
ORDER BY dislike_count DESC;
|
||||
```
|
||||
|
||||
The response looks like:
|
||||
|
||||
```response
|
||||
┌─title────────────────────────────────────────────────────────────────────────────────────────────────┬─like_count─┬─dislike_count─┐
|
||||
│ ClickHouse v21.11 Release Webinar │ 52 │ 3 │
|
||||
│ ClickHouse Introduction │ 97 │ 3 │
|
||||
│ Casa Modelo Algarve │ 180 │ 3 │
|
||||
│ Профайлер запросов: трудный путь │ 33 │ 3 │
|
||||
│ ClickHouse в Курсометре │ 4 │ 2 │
|
||||
│ 10 Good Reasons to Use ClickHouse │ 27 │ 2 │
|
||||
...
|
||||
|
||||
84 rows in set. Elapsed: 0.013 sec. Processed 155.65 thousand rows, 16.94 MB (11.96 million rows/s., 1.30 GB/s.)
|
||||
```
|
||||
|
||||
7. Here is a search for videos with **ClickHouse** in the `title` or `description` fields:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
view_count,
|
||||
like_count,
|
||||
dislike_count,
|
||||
concat('https://youtu.be/', id) AS url,
|
||||
title
|
||||
FROM youtube
|
||||
WHERE (title ILIKE '%ClickHouse%') OR (description ILIKE '%ClickHouse%')
|
||||
ORDER BY
|
||||
like_count DESC,
|
||||
view_count DESC
|
||||
```
|
||||
|
||||
This query has to process every row, and also parse through two columns of strings. Even then, we get decent performance at 4.15M rows/second:
|
||||
|
||||
```response
|
||||
1174 rows in set. Elapsed: 1099.368 sec. Processed 4.56 billion rows, 1.98 TB (4.15 million rows/s., 1.80 GB/s.)
|
||||
```
|
||||
|
||||
The results look like:
|
||||
|
||||
```response
|
||||
┌─view_count─┬─like_count─┬─dislike_count─┬─url──────────────────────────┬─title──────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ 1919 │ 63 │ 1 │ https://youtu.be/b9MeoOtAivQ │ ClickHouse v21.10 Release Webinar │
|
||||
│ 8710 │ 62 │ 4 │ https://youtu.be/PeV1mC2z--M │ What is JDBC DriverManager? | JDBC │
|
||||
│ 3534 │ 62 │ 1 │ https://youtu.be/8nWRhK9gw10 │ CLICKHOUSE - Arquitetura Modular │
|
||||
```
|
@ -120,7 +120,7 @@ Query:
|
||||
CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
|
||||
```
|
||||
|
||||
## LDAP Exernal User Directory {#ldap-external-user-directory}
|
||||
## LDAP External User Directory {#ldap-external-user-directory}
|
||||
|
||||
In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. To achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file.
|
||||
|
||||
|
@ -85,8 +85,8 @@ make the matching more natural, all query-level settings related to the query ca
|
||||
|
||||
If the query was aborted due to an exception or user cancellation, no entry is written into the query cache.
|
||||
|
||||
The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
|
||||
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
|
||||
The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in
|
||||
records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
|
||||
|
||||
To define how long a query must run at least such that its result can be cached, you can use setting
|
||||
[query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query
|
||||
|
@ -1361,7 +1361,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
|
||||
|
||||
The following settings are available:
|
||||
|
||||
- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
|
||||
- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
@ -1369,7 +1369,7 @@ The following settings are available:
|
||||
Changed settings take effect immediately.
|
||||
|
||||
:::warning
|
||||
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
|
||||
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size` or disable the query cache altogether.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -1882,6 +1882,16 @@ The update is performed asynchronously, in a separate system thread.
|
||||
Manage executing [distributed ddl queries](../../sql-reference/distributed-ddl.md) (CREATE, DROP, ALTER, RENAME) on cluster.
|
||||
Works only if [ZooKeeper](#server-settings_zookeeper) is enabled.
|
||||
|
||||
The configurable settings within `<distributed_ddl>` include:
|
||||
|
||||
- **path**: the path in Keeper for the `task_queue` for DDL queries
|
||||
- **profile**: the profile used to execute the DDL queries
|
||||
- **pool_size**: how many `ON CLUSTER` queries can be run simultaneously
|
||||
- **max_tasks_in_queue**: the maximum number of tasks that can be in the queue. Default is 1,000
|
||||
- **task_max_lifetime**: delete node if its age is greater than this value. Default is `7 * 24 * 60 * 60` (a week in seconds)
|
||||
- **cleanup_delay_period**: cleaning starts after new node event is received if the last cleaning wasn't made sooner than `cleanup_delay_period` seconds ago. Default is 60 seconds
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
|
37
docs/en/operations/system-tables/dropped_tables.md
Normal file
37
docs/en/operations/system-tables/dropped_tables.md
Normal file
@ -0,0 +1,37 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/dropped_tables
|
||||
---
|
||||
# dropped_tables
|
||||
|
||||
Contains information about tables that drop table has been executed but data cleanup has not been actually performed.
|
||||
|
||||
Columns:
|
||||
|
||||
- `index` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Index in marked_dropped_tables queue.
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Database.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid.
|
||||
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name.
|
||||
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory.
|
||||
- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`
|
||||
|
||||
**Example**
|
||||
|
||||
The following example shows how to get information about dropped_tables.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.dropped_tables\G
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
index: 0
|
||||
database: default
|
||||
table: test
|
||||
uuid: 03141bb2-e97a-4d7c-a172-95cc066bb3bd
|
||||
engine: MergeTree
|
||||
metadata_dropped_path: /data/ClickHouse/build/programs/data/metadata_dropped/default.test.03141bb2-e97a-4d7c-a172-95cc066bb3bd.sql
|
||||
table_dropped_time: 2023-03-16 23:43:31
|
||||
```
|
@ -1238,7 +1238,7 @@ Formats a Time according to the given Format string. Format is a constant expres
|
||||
|
||||
formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
|
||||
|
||||
The opposite operation of this function is [formatDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#formatdatetime).
|
||||
The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime).
|
||||
|
||||
Alias: `DATE_FORMAT`.
|
||||
|
||||
@ -1334,7 +1334,7 @@ Result:
|
||||
|
||||
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
|
||||
|
||||
The opposite operation of this function is [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#formatdatetimeinjodasyntax).
|
||||
The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax).
|
||||
|
||||
**Replacement fields**
|
||||
|
||||
|
@ -1148,9 +1148,10 @@ Result:
|
||||
└───────────────────────────┴──────────────────────────────┘
|
||||
```
|
||||
|
||||
## parseDateTime
|
||||
## parseDateTime {#type_conversion_functions-parseDateTime}
|
||||
|
||||
Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
|
||||
|
||||
This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime).
|
||||
|
||||
**Syntax**
|
||||
@ -1163,6 +1164,7 @@ parseDateTime(str, format[, timezone])
|
||||
|
||||
- `str` — the String to be parsed
|
||||
- `format` — the format string
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
@ -1186,9 +1188,10 @@ SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')
|
||||
|
||||
Alias: `TO_TIMESTAMP`.
|
||||
|
||||
## parseDateTimeInJodaSyntax
|
||||
## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax}
|
||||
|
||||
Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
|
||||
|
||||
This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax).
|
||||
|
||||
**Syntax**
|
||||
@ -1201,6 +1204,7 @@ parseDateTimeInJodaSyntax(str, format[, timezone])
|
||||
|
||||
- `str` — the String to be parsed
|
||||
- `format` — the format string
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
|
@ -128,7 +128,7 @@ SELECT
|
||||
user_agent,
|
||||
sum(pages_visited)
|
||||
FROM visits
|
||||
GROUP BY user_id
|
||||
GROUP BY user_agent
|
||||
```
|
||||
|
||||
As mentioned before, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used:
|
||||
|
@ -36,7 +36,61 @@ There are multiple ways of user identification:
|
||||
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
|
||||
- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
|
||||
|
||||
For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'.
|
||||
## Examples
|
||||
|
||||
1. The following username is `name1` and does not require a password - which obviously doesn't provide much security:
|
||||
|
||||
```sql
|
||||
CREATE USER name1 NOT IDENTIFIED
|
||||
```
|
||||
|
||||
2. To specify a plaintext password:
|
||||
|
||||
```sql
|
||||
CREATE USER name2 IDENTIFIED WITH plaintext_password BY 'my_password'
|
||||
```
|
||||
|
||||
:::warning
|
||||
The password is stored in a SQL text file in `/var/lib/clickhouse/access`, so it's not a good idea to use `plaintext_password`. Try `sha256_password` instead, as demonstrated next...
|
||||
:::
|
||||
|
||||
3. The best option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example:
|
||||
|
||||
```sql
|
||||
CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
|
||||
```
|
||||
|
||||
Notice ClickHouse generates and runs the following command for you:
|
||||
|
||||
```response
|
||||
CREATE USER name3
|
||||
IDENTIFIED WITH sha256_hash
|
||||
BY '8B3404953FCAA509540617F082DB13B3E0734F90FF6365C19300CC6A6EA818D6'
|
||||
SALT 'D6489D8B5692D82FF944EA6415785A8A8A1AF33825456AFC554487725A74A609'
|
||||
```
|
||||
|
||||
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
|
||||
|
||||
```bash
|
||||
/var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql
|
||||
ATTACH USER name3 IDENTIFIED WITH sha256_hash BY '0C268556C1680BEF0640AAC1E7187566704208398DA31F03D18C74F5C5BE5053' SALT '4FB16307F5E10048196966DD7E6876AE53DE6A1D1F625488482C75F14A5097C7';
|
||||
```
|
||||
|
||||
:::note
|
||||
If you have already created a hash value and corresponding salt value for a username, then you can use `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`. For identification with `sha256_hash` using `SALT` - hash must be calculated from concatenation of 'password' and 'salt'.
|
||||
:::
|
||||
|
||||
4. The `double_sha1_password` is not typically needed, but comes in handy when working with clients that require it (like the MySQL interface):
|
||||
|
||||
```sql
|
||||
CREATE USER name4 IDENTIFIED WITH double_sha1_password BY 'my_password'
|
||||
```
|
||||
|
||||
ClickHouse generates and runs the following query:
|
||||
|
||||
```response
|
||||
CREATE USER name4 IDENTIFIED WITH double_sha1_hash BY 'CCD3A959D6A004B9C3807B728BC2E55B67E10518'
|
||||
```
|
||||
|
||||
## User Host
|
||||
|
||||
|
@ -21,13 +21,13 @@ Note that if you use this statement to get `CREATE` query of system tables, you
|
||||
Prints a list of all databases.
|
||||
|
||||
```sql
|
||||
SHOW DATABASES [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
SHOW DATABASES [[NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
This statement is identical to the query:
|
||||
|
||||
```sql
|
||||
SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
SELECT name FROM system.databases [WHERE name [NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
### Examples
|
||||
@ -117,7 +117,7 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
|
||||
Displays a list of tables.
|
||||
|
||||
```sql
|
||||
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
SHOW [FULL] [TEMPORARY] TABLES [{FROM | IN} <db>] [[NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
If the `FROM` clause is not specified, the query returns the list of tables from the current database.
|
||||
@ -125,7 +125,7 @@ If the `FROM` clause is not specified, the query returns the list of tables from
|
||||
This statement is identical to the query:
|
||||
|
||||
```sql
|
||||
SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
SELECT name FROM system.tables [WHERE name [NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
### Examples
|
||||
@ -370,7 +370,7 @@ Returns a list of clusters. All available clusters are listed in the [system.clu
|
||||
|
||||
``` sql
|
||||
SHOW CLUSTER '<name>'
|
||||
SHOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
|
||||
SHOW CLUSTERS [[NOT] LIKE|ILIKE '<pattern>'] [LIMIT <N>]
|
||||
```
|
||||
### Examples
|
||||
|
||||
|
@ -7,20 +7,20 @@ sidebar_position: 101
|
||||
|
||||
# 什么是列存储数据库? {#what-is-a-columnar-database}
|
||||
|
||||
列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是,影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse就是这样一个典型的例子。
|
||||
列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是,影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse 就是这样一个典型的例子。
|
||||
|
||||
列存储数据库的主要优点是:
|
||||
|
||||
- 查询只使用许多列其中的少数列。
|
||||
— 聚合对大量数据的查询。
|
||||
— 按列压缩。
|
||||
- 聚合对大量数据的查询。
|
||||
- 按列压缩。
|
||||
|
||||
下面是构建报表时传统的面向行系统和柱状数据库之间的区别:
|
||||
|
||||
**传统行存储**
|
||||
!(传统行存储)(https://clickhouse.com/docs/en/images/row-oriented.gif)
|
||||
![传统行存储](https://clickhouse.com/docs/assets/images/row-oriented-3e6fd5aa48e3075202d242b4799da8fa.gif)
|
||||
|
||||
**列存储**
|
||||
!(列存储)(https://clickhouse.com/docs/en/images/column-oriented.gif)
|
||||
![列存储](https://clickhouse.com/docs/assets/images/column-oriented-d082e49b7743d4ded32c7952bfdb028f.gif)
|
||||
|
||||
列存储数据库是分析应用程序的首选,因为它允许在一个表中有许多列以防万一,但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的,因为和数据仓库一样,它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。
|
||||
列存储数据库是分析应用程序的首选,因为它允许在一个表中有许多列以防万一,但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的,因为和数据仓库一样,它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse 结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。
|
||||
|
@ -1517,7 +1517,7 @@
|
||||
|
||||
<!-- Configuration for the query cache -->
|
||||
<!-- <query_cache> -->
|
||||
<!-- <size>1073741824</size> -->
|
||||
<!-- <max_size>1073741824</max_size> -->
|
||||
<!-- <max_entries>1024</max_entries> -->
|
||||
<!-- <max_entry_size>1048576</max_entry_size> -->
|
||||
<!-- <max_entry_rows>30000000</max_entry_rows> -->
|
||||
|
@ -674,18 +674,16 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
|
||||
backup_entries_collector.getContext()->getAccessControl());
|
||||
|
||||
auto backup_coordination = backup_entries_collector.getBackupCoordination();
|
||||
String current_host_id = backup_entries_collector.getBackupSettings().host_id;
|
||||
backup_coordination->addReplicatedAccessFilePath(zookeeper_path, type, current_host_id, backup_entry_with_path.first);
|
||||
backup_coordination->addReplicatedAccessFilePath(zookeeper_path, type, backup_entry_with_path.first);
|
||||
|
||||
backup_entries_collector.addPostTask(
|
||||
[backup_entry = backup_entry_with_path.second,
|
||||
zookeeper_path = zookeeper_path,
|
||||
type,
|
||||
current_host_id,
|
||||
&backup_entries_collector,
|
||||
backup_coordination]
|
||||
{
|
||||
for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type, current_host_id))
|
||||
for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type))
|
||||
backup_entries_collector.addBackupEntry(path, backup_entry);
|
||||
});
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ QueryTreeNodePtr ArrayJoinNode::cloneImpl() const
|
||||
return std::make_shared<ArrayJoinNode>(getTableExpression(), getJoinExpressionsNode(), is_left);
|
||||
}
|
||||
|
||||
ASTPtr ArrayJoinNode::toASTImpl() const
|
||||
ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto array_join_ast = std::make_shared<ASTArrayJoin>();
|
||||
array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
|
||||
@ -63,9 +63,9 @@ ASTPtr ArrayJoinNode::toASTImpl() const
|
||||
|
||||
auto * column_node = array_join_expression->as<ColumnNode>();
|
||||
if (column_node && column_node->getExpression())
|
||||
array_join_expression_ast = column_node->getExpression()->toAST();
|
||||
array_join_expression_ast = column_node->getExpression()->toAST(options);
|
||||
else
|
||||
array_join_expression_ast = array_join_expression->toAST();
|
||||
array_join_expression_ast = array_join_expression->toAST(options);
|
||||
|
||||
array_join_expression_ast->setAlias(array_join_expression->getAlias());
|
||||
array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast));
|
||||
@ -75,7 +75,7 @@ ASTPtr ArrayJoinNode::toASTImpl() const
|
||||
array_join_ast->expression_list = array_join_ast->children.back();
|
||||
|
||||
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index]);
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index], options);
|
||||
|
||||
auto array_join_query_element_ast = std::make_shared<ASTTablesInSelectQueryElement>();
|
||||
array_join_query_element_ast->children.push_back(std::move(array_join_ast));
|
||||
|
@ -99,7 +99,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
bool is_left = false;
|
||||
|
@ -91,12 +91,12 @@ QueryTreeNodePtr ColumnNode::cloneImpl() const
|
||||
return std::make_shared<ColumnNode>(column, getSourceWeakPointer());
|
||||
}
|
||||
|
||||
ASTPtr ColumnNode::toASTImpl() const
|
||||
ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
std::vector<std::string> column_identifier_parts;
|
||||
|
||||
auto column_source = getColumnSourceOrNull();
|
||||
if (column_source)
|
||||
if (column_source && options.fully_qualified_identifiers)
|
||||
{
|
||||
auto node_type = column_source->getNodeType();
|
||||
if (node_type == QueryTreeNodeType::TABLE ||
|
||||
|
@ -132,7 +132,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
const QueryTreeNodeWeakPtr & getSourceWeakPointer() const
|
||||
|
@ -91,7 +91,7 @@ QueryTreeNodePtr ApplyColumnTransformerNode::cloneImpl() const
|
||||
return std::make_shared<ApplyColumnTransformerNode>(getExpressionNode());
|
||||
}
|
||||
|
||||
ASTPtr ApplyColumnTransformerNode::toASTImpl() const
|
||||
ASTPtr ApplyColumnTransformerNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto ast_apply_transformer = std::make_shared<ASTColumnsApplyTransformer>();
|
||||
const auto & expression_node = getExpressionNode();
|
||||
@ -100,14 +100,14 @@ ASTPtr ApplyColumnTransformerNode::toASTImpl() const
|
||||
{
|
||||
auto & function_expression = expression_node->as<FunctionNode &>();
|
||||
ast_apply_transformer->func_name = function_expression.getFunctionName();
|
||||
ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST();
|
||||
ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST(options);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & lambda_expression = expression_node->as<LambdaNode &>();
|
||||
if (!lambda_expression.getArgumentNames().empty())
|
||||
ast_apply_transformer->lambda_arg = lambda_expression.getArgumentNames()[0];
|
||||
ast_apply_transformer->lambda = lambda_expression.toAST();
|
||||
ast_apply_transformer->lambda = lambda_expression.toAST(options);
|
||||
}
|
||||
|
||||
return ast_apply_transformer;
|
||||
@ -227,7 +227,7 @@ QueryTreeNodePtr ExceptColumnTransformerNode::cloneImpl() const
|
||||
return std::make_shared<ExceptColumnTransformerNode>(except_column_names, is_strict);
|
||||
}
|
||||
|
||||
ASTPtr ExceptColumnTransformerNode::toASTImpl() const
|
||||
ASTPtr ExceptColumnTransformerNode::toASTImpl(const ConvertToASTOptions & /* options */) const
|
||||
{
|
||||
auto ast_except_transformer = std::make_shared<ASTColumnsExceptTransformer>();
|
||||
|
||||
@ -334,7 +334,7 @@ QueryTreeNodePtr ReplaceColumnTransformerNode::cloneImpl() const
|
||||
return result_replace_transformer;
|
||||
}
|
||||
|
||||
ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
|
||||
ASTPtr ReplaceColumnTransformerNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto ast_replace_transformer = std::make_shared<ASTColumnsReplaceTransformer>();
|
||||
|
||||
@ -347,7 +347,7 @@ ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
|
||||
{
|
||||
auto replacement_ast = std::make_shared<ASTColumnsReplaceTransformer::Replacement>();
|
||||
replacement_ast->name = replacements_names[i];
|
||||
replacement_ast->children.push_back(replacement_expressions_nodes[i]->toAST());
|
||||
replacement_ast->children.push_back(replacement_expressions_nodes[i]->toAST(options));
|
||||
ast_replace_transformer->children.push_back(std::move(replacement_ast));
|
||||
}
|
||||
|
||||
|
@ -141,7 +141,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
ApplyColumnTransformerType apply_transformer_type = ApplyColumnTransformerType::LAMBDA;
|
||||
@ -220,7 +220,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
ExceptColumnTransformerType except_transformer_type;
|
||||
@ -298,7 +298,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
ListNode & getReplacements()
|
||||
|
@ -75,11 +75,14 @@ QueryTreeNodePtr ConstantNode::cloneImpl() const
|
||||
return std::make_shared<ConstantNode>(constant_value, source_expression);
|
||||
}
|
||||
|
||||
ASTPtr ConstantNode::toASTImpl() const
|
||||
ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
const auto & constant_value_literal = constant_value->getValue();
|
||||
auto constant_value_ast = std::make_shared<ASTLiteral>(constant_value_literal);
|
||||
|
||||
if (!options.add_cast_for_constants)
|
||||
return constant_value_ast;
|
||||
|
||||
bool need_to_add_cast_function = false;
|
||||
auto constant_value_literal_type = constant_value_literal.getType();
|
||||
WhichDataType constant_value_type(constant_value->getType());
|
||||
|
@ -83,7 +83,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
ConstantValuePtr constant_value;
|
||||
|
@ -197,7 +197,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
|
||||
return result_function;
|
||||
}
|
||||
|
||||
ASTPtr FunctionNode::toASTImpl() const
|
||||
ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto function_ast = std::make_shared<ASTFunction>();
|
||||
|
||||
@ -212,12 +212,12 @@ ASTPtr FunctionNode::toASTImpl() const
|
||||
const auto & parameters = getParameters();
|
||||
if (!parameters.getNodes().empty())
|
||||
{
|
||||
function_ast->children.push_back(parameters.toAST());
|
||||
function_ast->children.push_back(parameters.toAST(options));
|
||||
function_ast->parameters = function_ast->children.back();
|
||||
}
|
||||
|
||||
const auto & arguments = getArguments();
|
||||
function_ast->children.push_back(arguments.toAST());
|
||||
function_ast->children.push_back(arguments.toAST(options));
|
||||
function_ast->arguments = function_ast->children.back();
|
||||
|
||||
auto window_node = getWindowNode();
|
||||
@ -226,7 +226,7 @@ ASTPtr FunctionNode::toASTImpl() const
|
||||
if (auto * identifier_node = window_node->as<IdentifierNode>())
|
||||
function_ast->window_name = identifier_node->getIdentifier().getFullName();
|
||||
else
|
||||
function_ast->window_definition = window_node->toAST();
|
||||
function_ast->window_definition = window_node->toAST(options);
|
||||
}
|
||||
|
||||
return function_ast;
|
||||
|
@ -209,7 +209,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
String function_name;
|
||||
|
@ -331,9 +331,9 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const QueryTreeNodePtr & node_t
|
||||
return cloneAndReplace(replacement_map);
|
||||
}
|
||||
|
||||
ASTPtr IQueryTreeNode::toAST() const
|
||||
ASTPtr IQueryTreeNode::toAST(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto converted_node = toASTImpl();
|
||||
auto converted_node = toASTImpl(options);
|
||||
|
||||
if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(converted_node.get()))
|
||||
converted_node->setAlias(alias);
|
||||
|
@ -181,8 +181,17 @@ public:
|
||||
*/
|
||||
String formatOriginalASTForErrorMessage() const;
|
||||
|
||||
struct ConvertToASTOptions
|
||||
{
|
||||
/// Add _CAST if constant litral type is different from column type
|
||||
bool add_cast_for_constants = true;
|
||||
|
||||
/// Identifiers are fully qualified (`database.table.column`), otherwise names are just column names (`column`)
|
||||
bool fully_qualified_identifiers = true;
|
||||
};
|
||||
|
||||
/// Convert query tree to AST
|
||||
ASTPtr toAST() const;
|
||||
ASTPtr toAST(const ConvertToASTOptions & options = { .add_cast_for_constants = true, .fully_qualified_identifiers = true }) const;
|
||||
|
||||
/// Convert query tree to AST and then format it for error message.
|
||||
String formatConvertedASTForErrorMessage() const;
|
||||
@ -258,7 +267,7 @@ protected:
|
||||
virtual QueryTreeNodePtr cloneImpl() const = 0;
|
||||
|
||||
/// Subclass must convert its internal state and its children to AST
|
||||
virtual ASTPtr toASTImpl() const = 0;
|
||||
virtual ASTPtr toASTImpl(const ConvertToASTOptions & options) const = 0;
|
||||
|
||||
QueryTreeNodes children;
|
||||
QueryTreeWeakNodes weak_pointers;
|
||||
|
@ -58,7 +58,7 @@ QueryTreeNodePtr IdentifierNode::cloneImpl() const
|
||||
return std::make_shared<IdentifierNode>(identifier);
|
||||
}
|
||||
|
||||
ASTPtr IdentifierNode::toASTImpl() const
|
||||
ASTPtr IdentifierNode::toASTImpl(const ConvertToASTOptions & /* options */) const
|
||||
{
|
||||
auto identifier_parts = identifier.getParts();
|
||||
return std::make_shared<ASTIdentifier>(std::move(identifier_parts));
|
||||
|
@ -59,7 +59,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
Identifier identifier;
|
||||
|
@ -44,11 +44,11 @@ QueryTreeNodePtr InterpolateNode::cloneImpl() const
|
||||
return std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
|
||||
}
|
||||
|
||||
ASTPtr InterpolateNode::toASTImpl() const
|
||||
ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto result = std::make_shared<ASTInterpolateElement>();
|
||||
result->column = getExpression()->toAST()->getColumnName();
|
||||
result->children.push_back(getInterpolateExpression()->toAST());
|
||||
result->column = getExpression()->toAST(options)->getColumnName();
|
||||
result->children.push_back(getInterpolateExpression()->toAST(options));
|
||||
result->expr = result->children.back();
|
||||
|
||||
return result;
|
||||
|
@ -59,7 +59,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
static constexpr size_t expression_child_index = 0;
|
||||
|
@ -99,17 +99,17 @@ QueryTreeNodePtr JoinNode::cloneImpl() const
|
||||
return std::make_shared<JoinNode>(getLeftTableExpression(), getRightTableExpression(), getJoinExpression(), locality, strictness, kind);
|
||||
}
|
||||
|
||||
ASTPtr JoinNode::toASTImpl() const
|
||||
ASTPtr JoinNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
|
||||
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index]);
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index], options);
|
||||
|
||||
size_t join_table_index = tables_in_select_query_ast->children.size();
|
||||
|
||||
auto join_ast = toASTTableJoin();
|
||||
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index]);
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index], options);
|
||||
|
||||
auto & table_element = tables_in_select_query_ast->children.at(join_table_index)->as<ASTTablesInSelectQueryElement &>();
|
||||
table_element.children.push_back(std::move(join_ast));
|
||||
|
@ -148,7 +148,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
JoinLocality locality = JoinLocality::Unspecified;
|
||||
|
@ -65,17 +65,17 @@ QueryTreeNodePtr LambdaNode::cloneImpl() const
|
||||
return std::make_shared<LambdaNode>(argument_names, getExpression());
|
||||
}
|
||||
|
||||
ASTPtr LambdaNode::toASTImpl() const
|
||||
ASTPtr LambdaNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto lambda_function_arguments_ast = std::make_shared<ASTExpressionList>();
|
||||
|
||||
auto tuple_function = std::make_shared<ASTFunction>();
|
||||
tuple_function->name = "tuple";
|
||||
tuple_function->children.push_back(children[arguments_child_index]->toAST());
|
||||
tuple_function->children.push_back(children[arguments_child_index]->toAST(options));
|
||||
tuple_function->arguments = tuple_function->children.back();
|
||||
|
||||
lambda_function_arguments_ast->children.push_back(std::move(tuple_function));
|
||||
lambda_function_arguments_ast->children.push_back(children[expression_child_index]->toAST());
|
||||
lambda_function_arguments_ast->children.push_back(children[expression_child_index]->toAST(options));
|
||||
|
||||
auto lambda_function_ast = std::make_shared<ASTFunction>();
|
||||
lambda_function_ast->name = "lambda";
|
||||
|
@ -98,7 +98,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
Names argument_names;
|
||||
|
@ -54,7 +54,7 @@ QueryTreeNodePtr ListNode::cloneImpl() const
|
||||
return std::make_shared<ListNode>();
|
||||
}
|
||||
|
||||
ASTPtr ListNode::toASTImpl() const
|
||||
ASTPtr ListNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto expression_list_ast = std::make_shared<ASTExpressionList>();
|
||||
|
||||
@ -62,7 +62,7 @@ ASTPtr ListNode::toASTImpl() const
|
||||
expression_list_ast->children.resize(children_size);
|
||||
|
||||
for (size_t i = 0; i < children_size; ++i)
|
||||
expression_list_ast->children[i] = children[i]->toAST();
|
||||
expression_list_ast->children[i] = children[i]->toAST(options);
|
||||
|
||||
return expression_list_ast;
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -204,7 +204,7 @@ QueryTreeNodePtr MatcherNode::cloneImpl() const
|
||||
return matcher_node;
|
||||
}
|
||||
|
||||
ASTPtr MatcherNode::toASTImpl() const
|
||||
ASTPtr MatcherNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
ASTPtr result;
|
||||
ASTPtr transformers;
|
||||
@ -216,7 +216,7 @@ ASTPtr MatcherNode::toASTImpl() const
|
||||
transformers = std::make_shared<ASTColumnsTransformerList>();
|
||||
|
||||
for (const auto & column_transformer : column_transformers)
|
||||
transformers->children.push_back(column_transformer->toAST());
|
||||
transformers->children.push_back(column_transformer->toAST(options));
|
||||
}
|
||||
|
||||
if (matcher_type == MatcherNodeType::ASTERISK)
|
||||
|
@ -148,7 +148,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
explicit MatcherNode(MatcherNodeType matcher_type_,
|
||||
|
@ -259,7 +259,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
|
||||
return result_query_node;
|
||||
}
|
||||
|
||||
ASTPtr QueryNode::toASTImpl() const
|
||||
ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto select_query = std::make_shared<ASTSelectQuery>();
|
||||
select_query->distinct = is_distinct;
|
||||
@ -271,9 +271,9 @@ ASTPtr QueryNode::toASTImpl() const
|
||||
select_query->group_by_all = is_group_by_all;
|
||||
|
||||
if (hasWith())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options));
|
||||
|
||||
auto projection_ast = getProjection().toAST();
|
||||
auto projection_ast = getProjection().toAST(options);
|
||||
auto & projection_expression_list_ast = projection_ast->as<ASTExpressionList &>();
|
||||
size_t projection_expression_list_ast_children_size = projection_expression_list_ast.children.size();
|
||||
if (projection_expression_list_ast_children_size != getProjection().getNodes().size())
|
||||
@ -293,44 +293,44 @@ ASTPtr QueryNode::toASTImpl() const
|
||||
select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(projection_ast));
|
||||
|
||||
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree());
|
||||
addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree(), options);
|
||||
select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast));
|
||||
|
||||
if (getPrewhere())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, getPrewhere()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, getPrewhere()->toAST(options));
|
||||
|
||||
if (getWhere())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST(options));
|
||||
|
||||
if (!is_group_by_all && hasGroupBy())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST(options));
|
||||
|
||||
if (hasHaving())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::HAVING, getHaving()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::HAVING, getHaving()->toAST(options));
|
||||
|
||||
if (hasWindow())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WINDOW, getWindow().toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WINDOW, getWindow().toAST(options));
|
||||
|
||||
if (hasOrderBy())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, getOrderBy().toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, getOrderBy().toAST(options));
|
||||
|
||||
if (hasInterpolate())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, getInterpolate()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, getInterpolate()->toAST(options));
|
||||
|
||||
if (hasLimitByLimit())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_LENGTH, getLimitByLimit()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_LENGTH, getLimitByLimit()->toAST(options));
|
||||
|
||||
if (hasLimitByOffset())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_OFFSET, getLimitByOffset()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_OFFSET, getLimitByOffset()->toAST(options));
|
||||
|
||||
if (hasLimitBy())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY, getLimitBy().toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY, getLimitBy().toAST(options));
|
||||
|
||||
if (hasLimit())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, getLimit()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, getLimit()->toAST(options));
|
||||
|
||||
if (hasOffset())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST());
|
||||
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST(options));
|
||||
|
||||
if (hasSettingsChanges())
|
||||
{
|
||||
|
@ -575,7 +575,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
bool is_subquery = false;
|
||||
|
@ -838,8 +838,14 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
|
||||
const auto & function_arguments_list = table_function_expression.arguments->as<ASTExpressionList &>().children;
|
||||
for (const auto & argument : function_arguments_list)
|
||||
{
|
||||
if (!node->getSettingsChanges().empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' has arguments after SETTINGS",
|
||||
table_function_expression.formatForErrorMessage());
|
||||
|
||||
if (argument->as<ASTSelectQuery>() || argument->as<ASTSelectWithUnionQuery>() || argument->as<ASTSelectIntersectExceptQuery>())
|
||||
node->getArguments().getNodes().push_back(buildSelectOrUnionExpression(argument, false /*is_subquery*/, {} /*cte_name*/, context));
|
||||
else if (const auto * ast_set = argument->as<ASTSetQuery>())
|
||||
node->setSettingsChanges(ast_set->changes);
|
||||
else
|
||||
node->getArguments().getNodes().push_back(buildExpression(argument, context));
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ QueryTreeNodePtr SortNode::cloneImpl() const
|
||||
return std::make_shared<SortNode>(nullptr /*expression*/, sort_direction, nulls_sort_direction, collator, with_fill);
|
||||
}
|
||||
|
||||
ASTPtr SortNode::toASTImpl() const
|
||||
ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto result = std::make_shared<ASTOrderByElement>();
|
||||
result->direction = sort_direction == SortDirection::ASCENDING ? 1 : -1;
|
||||
@ -120,10 +120,10 @@ ASTPtr SortNode::toASTImpl() const
|
||||
result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value();
|
||||
|
||||
result->with_fill = with_fill;
|
||||
result->fill_from = hasFillFrom() ? getFillFrom()->toAST() : nullptr;
|
||||
result->fill_to = hasFillTo() ? getFillTo()->toAST() : nullptr;
|
||||
result->fill_step = hasFillStep() ? getFillStep()->toAST() : nullptr;
|
||||
result->children.push_back(getExpression()->toAST());
|
||||
result->fill_from = hasFillFrom() ? getFillFrom()->toAST(options) : nullptr;
|
||||
result->fill_to = hasFillTo() ? getFillTo()->toAST(options) : nullptr;
|
||||
result->fill_step = hasFillStep() ? getFillStep()->toAST(options) : nullptr;
|
||||
result->children.push_back(getExpression()->toAST(options));
|
||||
|
||||
if (collator)
|
||||
{
|
||||
|
@ -137,7 +137,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
static constexpr size_t sort_expression_child_index = 0;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
@ -71,6 +72,13 @@ void TableFunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "ARGUMENTS\n";
|
||||
arguments.dumpTreeImpl(buffer, format_state, indent + 4);
|
||||
}
|
||||
|
||||
if (!settings_changes.empty())
|
||||
{
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "SETTINGS";
|
||||
for (const auto & change : settings_changes)
|
||||
buffer << fmt::format(" {}={}", change.name, toString(change.value));
|
||||
}
|
||||
}
|
||||
|
||||
bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
@ -82,6 +90,9 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
if (storage && rhs_typed.storage)
|
||||
return storage_id == rhs_typed.storage_id;
|
||||
|
||||
if (settings_changes != rhs_typed.settings_changes)
|
||||
return false;
|
||||
|
||||
return table_expression_modifiers == rhs_typed.table_expression_modifiers;
|
||||
}
|
||||
|
||||
@ -99,6 +110,17 @@ void TableFunctionNode::updateTreeHashImpl(HashState & state) const
|
||||
|
||||
if (table_expression_modifiers)
|
||||
table_expression_modifiers->updateTreeHash(state);
|
||||
|
||||
state.update(settings_changes.size());
|
||||
for (const auto & change : settings_changes)
|
||||
{
|
||||
state.update(change.name.size());
|
||||
state.update(change.name);
|
||||
|
||||
const auto & value_dump = change.value.dump();
|
||||
state.update(value_dump.size());
|
||||
state.update(value_dump);
|
||||
}
|
||||
}
|
||||
|
||||
QueryTreeNodePtr TableFunctionNode::cloneImpl() const
|
||||
@ -109,20 +131,29 @@ QueryTreeNodePtr TableFunctionNode::cloneImpl() const
|
||||
result->storage_id = storage_id;
|
||||
result->storage_snapshot = storage_snapshot;
|
||||
result->table_expression_modifiers = table_expression_modifiers;
|
||||
result->settings_changes = settings_changes;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ASTPtr TableFunctionNode::toASTImpl() const
|
||||
ASTPtr TableFunctionNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto table_function_ast = std::make_shared<ASTFunction>();
|
||||
|
||||
table_function_ast->name = table_function_name;
|
||||
|
||||
const auto & arguments = getArguments();
|
||||
table_function_ast->children.push_back(arguments.toAST());
|
||||
table_function_ast->children.push_back(arguments.toAST(options));
|
||||
table_function_ast->arguments = table_function_ast->children.back();
|
||||
|
||||
if (!settings_changes.empty())
|
||||
{
|
||||
auto settings_ast = std::make_shared<ASTSetQuery>();
|
||||
settings_ast->changes = settings_changes;
|
||||
settings_ast->is_standalone = false;
|
||||
table_function_ast->arguments->children.push_back(std::move(settings_ast));
|
||||
}
|
||||
|
||||
return table_function_ast;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/SettingsChanges.h>
|
||||
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Storages/TableLockHolder.h>
|
||||
#include <Storages/StorageSnapshot.h>
|
||||
@ -122,6 +124,18 @@ public:
|
||||
return table_expression_modifiers;
|
||||
}
|
||||
|
||||
/// Get settings changes passed to table function
|
||||
const SettingsChanges & getSettingsChanges() const
|
||||
{
|
||||
return settings_changes;
|
||||
}
|
||||
|
||||
/// Set settings changes passed as last argument to table function
|
||||
void setSettingsChanges(SettingsChanges settings_changes_)
|
||||
{
|
||||
settings_changes = std::move(settings_changes_);
|
||||
}
|
||||
|
||||
/// Set table expression modifiers
|
||||
void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
|
||||
{
|
||||
@ -142,7 +156,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
String table_function_name;
|
||||
@ -151,6 +165,7 @@ private:
|
||||
StorageID storage_id;
|
||||
StorageSnapshotPtr storage_snapshot;
|
||||
std::optional<TableExpressionModifiers> table_expression_modifiers;
|
||||
SettingsChanges settings_changes;
|
||||
|
||||
static constexpr size_t arguments_child_index = 0;
|
||||
static constexpr size_t children_size = arguments_child_index + 1;
|
||||
|
@ -86,7 +86,7 @@ QueryTreeNodePtr TableNode::cloneImpl() const
|
||||
return result_table_node;
|
||||
}
|
||||
|
||||
ASTPtr TableNode::toASTImpl() const
|
||||
ASTPtr TableNode::toASTImpl(const ConvertToASTOptions & /* options */) const
|
||||
{
|
||||
if (!temporary_table_name.empty())
|
||||
return std::make_shared<ASTTableIdentifier>(temporary_table_name);
|
||||
|
@ -106,7 +106,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
StoragePtr storage;
|
||||
|
@ -140,12 +140,12 @@ QueryTreeNodePtr UnionNode::cloneImpl() const
|
||||
return result_union_node;
|
||||
}
|
||||
|
||||
ASTPtr UnionNode::toASTImpl() const
|
||||
ASTPtr UnionNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
|
||||
select_with_union_query->union_mode = union_mode;
|
||||
select_with_union_query->is_normalized = true;
|
||||
select_with_union_query->children.push_back(getQueriesNode()->toAST());
|
||||
select_with_union_query->children.push_back(getQueriesNode()->toAST(options));
|
||||
select_with_union_query->list_of_selects = select_with_union_query->children.back();
|
||||
|
||||
if (is_subquery)
|
||||
|
@ -143,7 +143,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
bool is_subquery = false;
|
||||
|
@ -268,7 +268,7 @@ static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expre
|
||||
return result_table_expression;
|
||||
}
|
||||
|
||||
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression)
|
||||
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression, const IQueryTreeNode::ConvertToASTOptions & convert_to_ast_options)
|
||||
{
|
||||
auto table_expression_node_type = table_expression->getNodeType();
|
||||
|
||||
@ -297,7 +297,7 @@ void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_q
|
||||
[[fallthrough]];
|
||||
case QueryTreeNodeType::JOIN:
|
||||
{
|
||||
auto table_expression_tables_in_select_query_ast = table_expression->toAST();
|
||||
auto table_expression_tables_in_select_query_ast = table_expression->toAST(convert_to_ast_options);
|
||||
tables_in_select_query_ast->children.reserve(table_expression_tables_in_select_query_ast->children.size());
|
||||
for (auto && table_element_ast : table_expression_tables_in_select_query_ast->children)
|
||||
tables_in_select_query_ast->children.push_back(std::move(table_element_ast));
|
||||
|
@ -40,7 +40,7 @@ std::optional<bool> tryExtractConstantFromConditionNode(const QueryTreeNodePtr &
|
||||
/** Add table expression in tables in select query children.
|
||||
* If table expression node is not of identifier node, table node, query node, table function node, join node or array join node type throws logical error exception.
|
||||
*/
|
||||
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression);
|
||||
void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression, const IQueryTreeNode::ConvertToASTOptions & convert_to_ast_options);
|
||||
|
||||
/// Extract table, table function, query, union from join tree
|
||||
QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node);
|
||||
|
@ -107,7 +107,7 @@ QueryTreeNodePtr WindowNode::cloneImpl() const
|
||||
return window_node;
|
||||
}
|
||||
|
||||
ASTPtr WindowNode::toASTImpl() const
|
||||
ASTPtr WindowNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto window_definition = std::make_shared<ASTWindowDefinition>();
|
||||
|
||||
@ -115,13 +115,13 @@ ASTPtr WindowNode::toASTImpl() const
|
||||
|
||||
if (hasPartitionBy())
|
||||
{
|
||||
window_definition->children.push_back(getPartitionByNode()->toAST());
|
||||
window_definition->children.push_back(getPartitionByNode()->toAST(options));
|
||||
window_definition->partition_by = window_definition->children.back();
|
||||
}
|
||||
|
||||
if (hasOrderBy())
|
||||
{
|
||||
window_definition->children.push_back(getOrderByNode()->toAST());
|
||||
window_definition->children.push_back(getOrderByNode()->toAST(options));
|
||||
window_definition->order_by = window_definition->children.back();
|
||||
}
|
||||
|
||||
@ -132,7 +132,7 @@ ASTPtr WindowNode::toASTImpl() const
|
||||
|
||||
if (hasFrameBeginOffset())
|
||||
{
|
||||
window_definition->children.push_back(getFrameBeginOffsetNode()->toAST());
|
||||
window_definition->children.push_back(getFrameBeginOffsetNode()->toAST(options));
|
||||
window_definition->frame_begin_offset = window_definition->children.back();
|
||||
}
|
||||
|
||||
@ -140,7 +140,7 @@ ASTPtr WindowNode::toASTImpl() const
|
||||
window_definition->frame_end_preceding = window_frame.end_preceding;
|
||||
if (hasFrameEndOffset())
|
||||
{
|
||||
window_definition->children.push_back(getFrameEndOffsetNode()->toAST());
|
||||
window_definition->children.push_back(getFrameEndOffsetNode()->toAST(options));
|
||||
window_definition->frame_end_offset = window_definition->children.back();
|
||||
}
|
||||
|
||||
|
@ -175,7 +175,7 @@ protected:
|
||||
|
||||
QueryTreeNodePtr cloneImpl() const override;
|
||||
|
||||
ASTPtr toASTImpl() const override;
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
private:
|
||||
static constexpr size_t order_by_child_index = 0;
|
||||
|
@ -36,7 +36,7 @@ public:
|
||||
return std::make_shared<SourceNode>();
|
||||
}
|
||||
|
||||
ASTPtr toASTImpl() const override
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & /* options */) const override
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -13,20 +13,20 @@ using FileInfo = IBackupCoordination::FileInfo;
|
||||
BackupCoordinationLocal::BackupCoordinationLocal() = default;
|
||||
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
|
||||
|
||||
void BackupCoordinationLocal::setStage(const String &, const String &, const String &)
|
||||
void BackupCoordinationLocal::setStage(const String &, const String &)
|
||||
{
|
||||
}
|
||||
|
||||
void BackupCoordinationLocal::setError(const String &, const Exception &)
|
||||
void BackupCoordinationLocal::setError(const Exception &)
|
||||
{
|
||||
}
|
||||
|
||||
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &)
|
||||
Strings BackupCoordinationLocal::waitForStage(const String &)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
|
||||
Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
@ -70,29 +70,29 @@ Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_sha
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
|
||||
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
replicated_access.addFilePath(access_zk_path, access_entity_type, host_id, file_path);
|
||||
replicated_access.addFilePath(access_zk_path, access_entity_type, "", file_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
|
||||
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return replicated_access.getFilePaths(access_zk_path, access_entity_type, host_id);
|
||||
return replicated_access.getFilePaths(access_zk_path, access_entity_type, "");
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path)
|
||||
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
replicated_sql_objects.addDirectory(loader_zk_path, object_type, host_id, dir_path);
|
||||
replicated_sql_objects.addDirectory(loader_zk_path, object_type, "", dir_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const
|
||||
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return replicated_sql_objects.getDirectories(loader_zk_path, object_type, host_id);
|
||||
return replicated_sql_objects.getDirectories(loader_zk_path, object_type, "");
|
||||
}
|
||||
|
||||
|
||||
|
@ -21,10 +21,10 @@ public:
|
||||
BackupCoordinationLocal();
|
||||
~BackupCoordinationLocal() override;
|
||||
|
||||
void setStage(const String & current_host, const String & new_stage, const String & message) override;
|
||||
void setError(const String & current_host, const Exception & exception) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
void setStage(const String & new_stage, const String & message) override;
|
||||
void setError(const Exception & exception) override;
|
||||
Strings waitForStage(const String & stage_to_wait) override;
|
||||
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
|
||||
void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
|
||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
|
||||
@ -37,11 +37,11 @@ public:
|
||||
void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
|
||||
Strings getReplicatedDataPaths(const String & table_shared_id) const override;
|
||||
|
||||
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
|
||||
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
|
||||
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
|
||||
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;
|
||||
|
||||
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) override;
|
||||
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const override;
|
||||
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
|
||||
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
|
||||
|
||||
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
|
||||
void updateFileInfo(const FileInfo & file_info) override;
|
||||
|
@ -166,17 +166,30 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
size_t BackupCoordinationRemote::findCurrentHostIndex(const Strings & all_hosts, const String & current_host)
|
||||
{
|
||||
auto it = std::find(all_hosts.begin(), all_hosts.end(), current_host);
|
||||
if (it == all_hosts.end())
|
||||
return 0;
|
||||
return it - all_hosts.begin();
|
||||
}
|
||||
|
||||
BackupCoordinationRemote::BackupCoordinationRemote(
|
||||
const BackupKeeperSettings & keeper_settings_,
|
||||
const String & root_zookeeper_path_,
|
||||
const String & backup_uuid_,
|
||||
zkutil::GetZooKeeper get_zookeeper_,
|
||||
const String & root_zookeeper_path_,
|
||||
const BackupKeeperSettings & keeper_settings_,
|
||||
const String & backup_uuid_,
|
||||
const Strings & all_hosts_,
|
||||
const String & current_host_,
|
||||
bool is_internal_)
|
||||
: keeper_settings(keeper_settings_)
|
||||
: get_zookeeper(get_zookeeper_)
|
||||
, root_zookeeper_path(root_zookeeper_path_)
|
||||
, zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
|
||||
, keeper_settings(keeper_settings_)
|
||||
, backup_uuid(backup_uuid_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, all_hosts(all_hosts_)
|
||||
, current_host(current_host_)
|
||||
, current_host_index(findCurrentHostIndex(all_hosts, current_host))
|
||||
, is_internal(is_internal_)
|
||||
{
|
||||
zookeeper_retries_info = ZooKeeperRetriesInfo(
|
||||
@ -251,22 +264,22 @@ void BackupCoordinationRemote::removeAllNodes()
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
|
||||
void BackupCoordinationRemote::setStage(const String & new_stage, const String & message)
|
||||
{
|
||||
stage_sync->set(current_host, new_stage, message);
|
||||
}
|
||||
|
||||
void BackupCoordinationRemote::setError(const String & current_host, const Exception & exception)
|
||||
void BackupCoordinationRemote::setError(const Exception & exception)
|
||||
{
|
||||
stage_sync->setError(current_host, exception);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
|
||||
Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait)
|
||||
{
|
||||
return stage_sync->wait(all_hosts, stage_to_wait);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
|
||||
Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout)
|
||||
{
|
||||
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
|
||||
}
|
||||
@ -403,7 +416,7 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
|
||||
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
@ -416,15 +429,15 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
|
||||
zk->createIfNotExists(path, "");
|
||||
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
|
||||
zk->createIfNotExists(path, "");
|
||||
path += "/" + host_id;
|
||||
path += "/" + current_host;
|
||||
zk->createIfNotExists(path, file_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
|
||||
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
prepareReplicatedAccess();
|
||||
return replicated_access->getFilePaths(access_zk_path, access_entity_type, host_id);
|
||||
return replicated_access->getFilePaths(access_zk_path, access_entity_type, current_host);
|
||||
}
|
||||
|
||||
void BackupCoordinationRemote::prepareReplicatedAccess() const
|
||||
@ -453,7 +466,7 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
|
||||
}
|
||||
}
|
||||
|
||||
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path)
|
||||
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
@ -474,15 +487,15 @@ void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_
|
||||
}
|
||||
|
||||
zk->createIfNotExists(path, "");
|
||||
path += "/" + host_id;
|
||||
path += "/" + current_host;
|
||||
zk->createIfNotExists(path, dir_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const
|
||||
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
prepareReplicatedSQLObjects();
|
||||
return replicated_sql_objects->getDirectories(loader_zk_path, object_type, host_id);
|
||||
return replicated_sql_objects->getDirectories(loader_zk_path, object_type, current_host);
|
||||
}
|
||||
|
||||
void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
|
||||
@ -827,5 +840,4 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -27,17 +27,20 @@ public:
|
||||
};
|
||||
|
||||
BackupCoordinationRemote(
|
||||
const BackupKeeperSettings & keeper_settings_,
|
||||
const String & root_zookeeper_path_,
|
||||
const String & backup_uuid_,
|
||||
zkutil::GetZooKeeper get_zookeeper_,
|
||||
const String & root_zookeeper_path_,
|
||||
const BackupKeeperSettings & keeper_settings_,
|
||||
const String & backup_uuid_,
|
||||
const Strings & all_hosts_,
|
||||
const String & current_host_,
|
||||
bool is_internal_);
|
||||
|
||||
~BackupCoordinationRemote() override;
|
||||
|
||||
void setStage(const String & current_host, const String & new_stage, const String & message) override;
|
||||
void setError(const String & current_host, const Exception & exception) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
void setStage(const String & new_stage, const String & message) override;
|
||||
void setError(const Exception & exception) override;
|
||||
Strings waitForStage(const String & stage_to_wait) override;
|
||||
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
|
||||
void addReplicatedPartNames(
|
||||
const String & table_shared_id,
|
||||
@ -58,11 +61,11 @@ public:
|
||||
void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
|
||||
Strings getReplicatedDataPaths(const String & table_shared_id) const override;
|
||||
|
||||
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
|
||||
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
|
||||
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
|
||||
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;
|
||||
|
||||
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) override;
|
||||
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const override;
|
||||
void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
|
||||
Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;
|
||||
|
||||
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
|
||||
void updateFileInfo(const FileInfo & file_info) override;
|
||||
@ -78,6 +81,8 @@ public:
|
||||
|
||||
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
|
||||
|
||||
static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host);
|
||||
|
||||
private:
|
||||
zkutil::ZooKeeperPtr getZooKeeper() const;
|
||||
zkutil::ZooKeeperPtr getZooKeeperNoLock() const;
|
||||
@ -91,11 +96,14 @@ private:
|
||||
void prepareReplicatedAccess() const;
|
||||
void prepareReplicatedSQLObjects() const;
|
||||
|
||||
const BackupKeeperSettings keeper_settings;
|
||||
const zkutil::GetZooKeeper get_zookeeper;
|
||||
const String root_zookeeper_path;
|
||||
const String zookeeper_path;
|
||||
const BackupKeeperSettings keeper_settings;
|
||||
const String backup_uuid;
|
||||
const zkutil::GetZooKeeper get_zookeeper;
|
||||
const Strings all_hosts;
|
||||
const String current_host;
|
||||
const size_t current_host_index;
|
||||
const bool is_internal;
|
||||
|
||||
mutable ZooKeeperRetriesInfo zookeeper_retries_info;
|
||||
|
@ -133,22 +133,22 @@ Strings BackupEntriesCollector::setStage(const String & new_stage, const String
|
||||
LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage)));
|
||||
current_stage = new_stage;
|
||||
|
||||
backup_coordination->setStage(backup_settings.host_id, new_stage, message);
|
||||
backup_coordination->setStage(new_stage, message);
|
||||
|
||||
if (new_stage == Stage::formatGatheringMetadata(1))
|
||||
{
|
||||
return backup_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
|
||||
return backup_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
|
||||
}
|
||||
else if (new_stage.starts_with(Stage::GATHERING_METADATA))
|
||||
{
|
||||
auto current_time = std::chrono::steady_clock::now();
|
||||
auto end_of_timeout = std::max(current_time, consistent_metadata_snapshot_end_time);
|
||||
return backup_coordination->waitForStage(
|
||||
all_hosts, new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
|
||||
new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
|
||||
}
|
||||
else
|
||||
{
|
||||
return backup_coordination->waitForStage(all_hosts, new_stage);
|
||||
return backup_coordination->waitForStage(new_stage);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,7 @@
|
||||
#include <Backups/BackupUtils.h>
|
||||
#include <Backups/IBackup.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Access/Common/AccessRightsElement.h>
|
||||
#include <Databases/DDLRenamingVisitor.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/setThreadName.h>
|
||||
|
||||
|
||||
@ -60,140 +57,6 @@ DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & e
|
||||
}
|
||||
|
||||
|
||||
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool)
|
||||
{
|
||||
size_t num_active_jobs = 0;
|
||||
std::mutex mutex;
|
||||
std::condition_variable event;
|
||||
std::exception_ptr exception;
|
||||
|
||||
bool always_single_threaded = !backup->supportsWritingInMultipleThreads();
|
||||
auto thread_group = CurrentThread::getGroup();
|
||||
|
||||
for (auto & name_and_entry : backup_entries)
|
||||
{
|
||||
auto & name = name_and_entry.first;
|
||||
auto & entry = name_and_entry.second;
|
||||
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (exception)
|
||||
break;
|
||||
++num_active_jobs;
|
||||
}
|
||||
|
||||
auto job = [&](bool async)
|
||||
{
|
||||
SCOPE_EXIT_SAFE(
|
||||
std::lock_guard lock{mutex};
|
||||
if (!--num_active_jobs)
|
||||
event.notify_all();
|
||||
if (async)
|
||||
CurrentThread::detachFromGroupIfNotDetached();
|
||||
);
|
||||
|
||||
try
|
||||
{
|
||||
if (async && thread_group)
|
||||
CurrentThread::attachToGroup(thread_group);
|
||||
|
||||
if (async)
|
||||
setThreadName("BackupWorker");
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (exception)
|
||||
return;
|
||||
}
|
||||
|
||||
backup->writeFile(name, std::move(entry));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (!exception)
|
||||
exception = std::current_exception();
|
||||
}
|
||||
};
|
||||
|
||||
if (always_single_threaded || !thread_pool.trySchedule([job] { job(true); }))
|
||||
job(false);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
event.wait(lock, [&] { return !num_active_jobs; });
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool)
|
||||
{
|
||||
size_t num_active_jobs = 0;
|
||||
std::mutex mutex;
|
||||
std::condition_variable event;
|
||||
std::exception_ptr exception;
|
||||
|
||||
auto thread_group = CurrentThread::getGroup();
|
||||
|
||||
for (auto & task : tasks)
|
||||
{
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (exception)
|
||||
break;
|
||||
++num_active_jobs;
|
||||
}
|
||||
|
||||
auto job = [&](bool async)
|
||||
{
|
||||
SCOPE_EXIT_SAFE(
|
||||
std::lock_guard lock{mutex};
|
||||
if (!--num_active_jobs)
|
||||
event.notify_all();
|
||||
if (async)
|
||||
CurrentThread::detachFromGroupIfNotDetached();
|
||||
);
|
||||
|
||||
try
|
||||
{
|
||||
if (async && thread_group)
|
||||
CurrentThread::attachToGroup(thread_group);
|
||||
|
||||
if (async)
|
||||
setThreadName("RestoreWorker");
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (exception)
|
||||
return;
|
||||
}
|
||||
|
||||
std::move(task)();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (!exception)
|
||||
exception = std::current_exception();
|
||||
}
|
||||
};
|
||||
|
||||
if (!thread_pool.trySchedule([job] { job(true); }))
|
||||
job(false);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
event.wait(lock, [&] { return !num_active_jobs; });
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Returns access required to execute BACKUP query.
|
||||
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements)
|
||||
{
|
||||
|
@ -7,21 +7,12 @@
|
||||
namespace DB
|
||||
{
|
||||
class IBackup;
|
||||
using BackupMutablePtr = std::shared_ptr<IBackup>;
|
||||
class IBackupEntry;
|
||||
using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
|
||||
using DataRestoreTasks = std::vector<std::function<void()>>;
|
||||
class AccessRightsElements;
|
||||
class DDLRenamingMap;
|
||||
|
||||
/// Initializes a DDLRenamingMap from a BACKUP or RESTORE query.
|
||||
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements);
|
||||
|
||||
/// Write backup entries to an opened backup.
|
||||
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool);
|
||||
|
||||
/// Run data restoring tasks which insert data to tables.
|
||||
void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool);
|
||||
|
||||
/// Returns access required to execute BACKUP query.
|
||||
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements);
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -38,14 +39,33 @@ namespace Stage = BackupCoordinationStage;
|
||||
|
||||
namespace
|
||||
{
|
||||
std::shared_ptr<IBackupCoordination> makeBackupCoordination(std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings, String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
|
||||
std::shared_ptr<IBackupCoordination> makeBackupCoordination(const ContextPtr & context, const BackupSettings & backup_settings, bool remote)
|
||||
{
|
||||
if (!root_zk_path.empty())
|
||||
if (remote)
|
||||
{
|
||||
if (!keeper_settings.has_value())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parameter keeper_settings is empty while root_zk_path is not. This is bug");
|
||||
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
|
||||
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
|
||||
return std::make_shared<BackupCoordinationRemote>(*keeper_settings, root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
|
||||
|
||||
BackupCoordinationRemote::BackupKeeperSettings keeper_settings
|
||||
{
|
||||
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
|
||||
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
|
||||
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
|
||||
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
|
||||
};
|
||||
|
||||
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
||||
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
|
||||
|
||||
return std::make_shared<BackupCoordinationRemote>(
|
||||
get_zookeeper,
|
||||
root_zk_path,
|
||||
keeper_settings,
|
||||
toString(*backup_settings.backup_uuid),
|
||||
all_hosts,
|
||||
backup_settings.host_id,
|
||||
backup_settings.internal);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -53,12 +73,19 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & root_zk_path, const String & restore_uuid, const ContextPtr & context, bool is_internal_backup)
|
||||
std::shared_ptr<IRestoreCoordination>
|
||||
makeRestoreCoordination(const ContextPtr & context, const RestoreSettings & restore_settings, bool remote)
|
||||
{
|
||||
if (!root_zk_path.empty())
|
||||
if (remote)
|
||||
{
|
||||
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
|
||||
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
|
||||
return std::make_shared<RestoreCoordinationRemote>(root_zk_path, restore_uuid, get_zookeeper, is_internal_backup);
|
||||
|
||||
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
||||
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
|
||||
|
||||
return std::make_shared<RestoreCoordinationRemote>(get_zookeeper, root_zk_path, toString(*restore_settings.restore_uuid), all_hosts, restore_settings.host_id, restore_settings.internal);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -68,12 +95,12 @@ namespace
|
||||
|
||||
/// Sends information about an exception to IBackupCoordination or IRestoreCoordination.
|
||||
template <typename CoordinationType>
|
||||
void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host, const Exception & exception)
|
||||
void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const Exception & exception)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (coordination)
|
||||
coordination->setError(current_host, exception);
|
||||
coordination->setError(exception);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -82,7 +109,7 @@ namespace
|
||||
|
||||
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
|
||||
template <typename CoordinationType>
|
||||
void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
|
||||
void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -90,12 +117,12 @@ namespace
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
sendExceptionToCoordination(coordination, current_host, e);
|
||||
sendExceptionToCoordination(coordination, e);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (coordination)
|
||||
coordination->setError(current_host, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
|
||||
coordination->setError(Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,24 +189,13 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
else
|
||||
backup_id = toString(*backup_settings.backup_uuid);
|
||||
|
||||
String root_zk_path;
|
||||
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination;
|
||||
if (backup_settings.internal)
|
||||
{
|
||||
/// The following call of makeBackupCoordination() is not essential because doBackup() will later create a backup coordination
|
||||
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
|
||||
/// if an exception will be thrown in startMakingBackup() other hosts will know about that.
|
||||
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
|
||||
BackupCoordinationRemote::BackupKeeperSettings keeper_settings
|
||||
{
|
||||
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
|
||||
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
|
||||
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
|
||||
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
|
||||
};
|
||||
backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
|
||||
backup_coordination = makeBackupCoordination(context, backup_settings, /* remote= */ true);
|
||||
}
|
||||
|
||||
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
|
||||
@ -238,7 +254,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
|
||||
/// Something bad happened, the backup has not built.
|
||||
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
|
||||
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
|
||||
sendCurrentExceptionToCoordination(backup_coordination);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -274,19 +290,9 @@ void BackupsWorker::doBackup(
|
||||
if (!on_cluster)
|
||||
context->checkAccess(required_access);
|
||||
|
||||
String root_zk_path;
|
||||
std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings;
|
||||
ClusterPtr cluster;
|
||||
if (on_cluster)
|
||||
{
|
||||
keeper_settings = BackupCoordinationRemote::BackupKeeperSettings
|
||||
{
|
||||
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
|
||||
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
|
||||
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
|
||||
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
|
||||
};
|
||||
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
|
||||
cluster = context->getCluster(backup_query->cluster);
|
||||
backup_settings.cluster_host_ids = cluster->getHostIDs();
|
||||
@ -294,7 +300,7 @@ void BackupsWorker::doBackup(
|
||||
|
||||
/// Make a backup coordination.
|
||||
if (!backup_coordination)
|
||||
backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
|
||||
backup_coordination = makeBackupCoordination(context, backup_settings, /* remote= */ on_cluster);
|
||||
|
||||
if (!allow_concurrent_backups && backup_coordination->hasConcurrentBackups(std::ref(num_active_backups)))
|
||||
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent backups not supported, turn on setting 'allow_concurrent_backups'");
|
||||
@ -330,9 +336,7 @@ void BackupsWorker::doBackup(
|
||||
executeDDLQueryOnCluster(backup_query, mutable_context, params);
|
||||
|
||||
/// Wait until all the hosts have written their backup entries.
|
||||
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
||||
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
|
||||
backup_coordination->waitForStage(all_hosts, Stage::COMPLETED);
|
||||
backup_coordination->waitForStage(Stage::COMPLETED);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -346,10 +350,10 @@ void BackupsWorker::doBackup(
|
||||
}
|
||||
|
||||
/// Write the backup entries to the backup.
|
||||
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
|
||||
writeBackupEntries(backup_id, backup, std::move(backup_entries), backups_thread_pool, backup_settings.internal);
|
||||
|
||||
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
|
||||
backup_coordination->setStage(backup_settings.host_id, Stage::COMPLETED, "");
|
||||
backup_coordination->setStage(Stage::COMPLETED, "");
|
||||
}
|
||||
|
||||
size_t num_files = 0;
|
||||
@ -374,6 +378,7 @@ void BackupsWorker::doBackup(
|
||||
|
||||
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging);
|
||||
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
|
||||
/// NOTE: we need to update metadata again after backup->finalizeWriting(), because backup metadata is written there.
|
||||
setNumFilesAndSize(backup_id, num_files, total_size, num_entries, uncompressed_size, compressed_size, 0, 0);
|
||||
}
|
||||
catch (...)
|
||||
@ -383,7 +388,7 @@ void BackupsWorker::doBackup(
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
|
||||
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
|
||||
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
|
||||
sendCurrentExceptionToCoordination(backup_coordination);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -394,6 +399,88 @@ void BackupsWorker::doBackup(
|
||||
}
|
||||
|
||||
|
||||
void BackupsWorker::writeBackupEntries(const OperationID & backup_id, BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool, bool internal)
|
||||
{
|
||||
size_t num_active_jobs = 0;
|
||||
std::mutex mutex;
|
||||
std::condition_variable event;
|
||||
std::exception_ptr exception;
|
||||
|
||||
bool always_single_threaded = !backup->supportsWritingInMultipleThreads();
|
||||
auto thread_group = CurrentThread::getGroup();
|
||||
|
||||
for (auto & name_and_entry : backup_entries)
|
||||
{
|
||||
auto & name = name_and_entry.first;
|
||||
auto & entry = name_and_entry.second;
|
||||
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (exception)
|
||||
break;
|
||||
++num_active_jobs;
|
||||
}
|
||||
|
||||
auto job = [&](bool async)
|
||||
{
|
||||
SCOPE_EXIT_SAFE(
|
||||
std::lock_guard lock{mutex};
|
||||
if (!--num_active_jobs)
|
||||
event.notify_all();
|
||||
if (async)
|
||||
CurrentThread::detachFromGroupIfNotDetached();
|
||||
);
|
||||
|
||||
try
|
||||
{
|
||||
if (async && thread_group)
|
||||
CurrentThread::attachToGroup(thread_group);
|
||||
|
||||
if (async)
|
||||
setThreadName("BackupWorker");
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (exception)
|
||||
return;
|
||||
}
|
||||
|
||||
backup->writeFile(name, std::move(entry));
|
||||
// Update metadata
|
||||
if (!internal)
|
||||
{
|
||||
setNumFilesAndSize(
|
||||
backup_id,
|
||||
backup->getNumFiles(),
|
||||
backup->getTotalSize(),
|
||||
backup->getNumEntries(),
|
||||
backup->getUncompressedSize(),
|
||||
backup->getCompressedSize(),
|
||||
0, 0);
|
||||
}
|
||||
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (!exception)
|
||||
exception = std::current_exception();
|
||||
}
|
||||
};
|
||||
|
||||
if (always_single_threaded || !thread_pool.trySchedule([job] { job(true); }))
|
||||
job(false);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
event.wait(lock, [&] { return !num_active_jobs; });
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
|
||||
{
|
||||
auto restore_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
|
||||
@ -417,8 +504,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
/// The following call of makeRestoreCoordination() is not essential because doRestore() will later create a restore coordination
|
||||
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
|
||||
/// if an exception will be thrown in startRestoring() other hosts will know about that.
|
||||
auto root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
|
||||
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ true);
|
||||
}
|
||||
|
||||
try
|
||||
@ -474,7 +560,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
{
|
||||
/// Something bad happened, the backup has not built.
|
||||
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
|
||||
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
|
||||
sendCurrentExceptionToCoordination(restore_coordination);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -509,14 +595,12 @@ void BackupsWorker::doRestore(
|
||||
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
|
||||
|
||||
String current_database = context->getCurrentDatabase();
|
||||
String root_zk_path;
|
||||
/// Checks access rights if this is ON CLUSTER query.
|
||||
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
|
||||
ClusterPtr cluster;
|
||||
bool on_cluster = !restore_query->cluster.empty();
|
||||
if (on_cluster)
|
||||
{
|
||||
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
|
||||
cluster = context->getCluster(restore_query->cluster);
|
||||
restore_settings.cluster_host_ids = cluster->getHostIDs();
|
||||
@ -539,7 +623,7 @@ void BackupsWorker::doRestore(
|
||||
|
||||
/// Make a restore coordination.
|
||||
if (!restore_coordination)
|
||||
restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
|
||||
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
|
||||
|
||||
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
|
||||
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
|
||||
@ -561,9 +645,7 @@ void BackupsWorker::doRestore(
|
||||
executeDDLQueryOnCluster(restore_query, context, params);
|
||||
|
||||
/// Wait until all the hosts have written their backup entries.
|
||||
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
||||
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
|
||||
restore_coordination->waitForStage(all_hosts, Stage::COMPLETED);
|
||||
restore_coordination->waitForStage(Stage::COMPLETED);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -578,23 +660,14 @@ void BackupsWorker::doRestore(
|
||||
}
|
||||
|
||||
/// Execute the data restoring tasks.
|
||||
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
|
||||
restoreTablesData(restore_id, backup, std::move(data_restore_tasks), restores_thread_pool);
|
||||
|
||||
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
|
||||
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
|
||||
restore_coordination->setStage(Stage::COMPLETED, "");
|
||||
}
|
||||
|
||||
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging);
|
||||
setStatus(restore_id, BackupStatus::RESTORED);
|
||||
setNumFilesAndSize(
|
||||
restore_id,
|
||||
backup->getNumFiles(),
|
||||
backup->getTotalSize(),
|
||||
backup->getNumEntries(),
|
||||
backup->getUncompressedSize(),
|
||||
backup->getCompressedSize(),
|
||||
backup->getNumReadFiles(),
|
||||
backup->getNumReadBytes());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -603,7 +676,7 @@ void BackupsWorker::doRestore(
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
|
||||
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
|
||||
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
|
||||
sendCurrentExceptionToCoordination(restore_coordination);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -614,6 +687,80 @@ void BackupsWorker::doRestore(
|
||||
}
|
||||
|
||||
|
||||
void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool)
|
||||
{
|
||||
size_t num_active_jobs = 0;
|
||||
std::mutex mutex;
|
||||
std::condition_variable event;
|
||||
std::exception_ptr exception;
|
||||
|
||||
auto thread_group = CurrentThread::getGroup();
|
||||
|
||||
for (auto & task : tasks)
|
||||
{
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (exception)
|
||||
break;
|
||||
++num_active_jobs;
|
||||
}
|
||||
|
||||
auto job = [&](bool async)
|
||||
{
|
||||
SCOPE_EXIT_SAFE(
|
||||
std::lock_guard lock{mutex};
|
||||
if (!--num_active_jobs)
|
||||
event.notify_all();
|
||||
if (async)
|
||||
CurrentThread::detachFromGroupIfNotDetached();
|
||||
);
|
||||
|
||||
try
|
||||
{
|
||||
if (async && thread_group)
|
||||
CurrentThread::attachToGroup(thread_group);
|
||||
|
||||
if (async)
|
||||
setThreadName("RestoreWorker");
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (exception)
|
||||
return;
|
||||
}
|
||||
|
||||
std::move(task)();
|
||||
setNumFilesAndSize(
|
||||
restore_id,
|
||||
backup->getNumFiles(),
|
||||
backup->getTotalSize(),
|
||||
backup->getNumEntries(),
|
||||
backup->getUncompressedSize(),
|
||||
backup->getCompressedSize(),
|
||||
backup->getNumReadFiles(),
|
||||
backup->getNumReadBytes());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (!exception)
|
||||
exception = std::current_exception();
|
||||
}
|
||||
};
|
||||
|
||||
if (!thread_pool.trySchedule([job] { job(true); }))
|
||||
job(false);
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
event.wait(lock, [&] { return !num_active_jobs; });
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void BackupsWorker::addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status)
|
||||
{
|
||||
Info info;
|
||||
|
@ -17,6 +17,12 @@ struct RestoreSettings;
|
||||
struct BackupInfo;
|
||||
class IBackupCoordination;
|
||||
class IRestoreCoordination;
|
||||
class IBackup;
|
||||
using BackupMutablePtr = std::shared_ptr<IBackup>;
|
||||
using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
class IBackupEntry;
|
||||
using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
|
||||
using DataRestoreTasks = std::vector<std::function<void()>>;
|
||||
|
||||
/// Manager of backups and restores: executes backups and restores' threads in the background.
|
||||
/// Keeps information about backups and restores started in this session.
|
||||
@ -99,6 +105,9 @@ private:
|
||||
ContextMutablePtr mutable_context,
|
||||
bool called_async);
|
||||
|
||||
/// Write backup entries to an opened backup.
|
||||
void writeBackupEntries(const OperationID & backup_id, BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool, bool internal);
|
||||
|
||||
OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);
|
||||
|
||||
void doRestore(
|
||||
@ -111,6 +120,9 @@ private:
|
||||
ContextMutablePtr context,
|
||||
bool called_async);
|
||||
|
||||
/// Run data restoring tasks which insert data to tables.
|
||||
void restoreTablesData(const OperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool);
|
||||
|
||||
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
|
||||
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);
|
||||
void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
|
||||
|
@ -22,10 +22,10 @@ public:
|
||||
virtual ~IBackupCoordination() = default;
|
||||
|
||||
/// Sets the current stage and waits for other hosts to come to this stage too.
|
||||
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
|
||||
virtual void setError(const String & current_host, const Exception & exception) = 0;
|
||||
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
|
||||
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
|
||||
virtual void setStage(const String & new_stage, const String & message) = 0;
|
||||
virtual void setError(const Exception & exception) = 0;
|
||||
virtual Strings waitForStage(const String & stage_to_wait) = 0;
|
||||
virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
|
||||
|
||||
struct PartNameAndChecksum
|
||||
{
|
||||
@ -66,12 +66,12 @@ public:
|
||||
virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0;
|
||||
|
||||
/// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage.
|
||||
virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) = 0;
|
||||
virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const = 0;
|
||||
virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) = 0;
|
||||
virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const = 0;
|
||||
|
||||
/// Adds a path to a directory with user-defined SQL objects inside the backup.
|
||||
virtual void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) = 0;
|
||||
virtual Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const = 0;
|
||||
virtual void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) = 0;
|
||||
virtual Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const = 0;
|
||||
|
||||
struct FileInfo
|
||||
{
|
||||
|
@ -18,10 +18,10 @@ public:
|
||||
virtual ~IRestoreCoordination() = default;
|
||||
|
||||
/// Sets the current stage and waits for other hosts to come to this stage too.
|
||||
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
|
||||
virtual void setError(const String & current_host, const Exception & exception) = 0;
|
||||
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
|
||||
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
|
||||
virtual void setStage(const String & new_stage, const String & message) = 0;
|
||||
virtual void setError(const Exception & exception) = 0;
|
||||
virtual Strings waitForStage(const String & stage_to_wait) = 0;
|
||||
virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
|
||||
|
||||
static constexpr const char * kErrorStatus = "error";
|
||||
|
||||
|
@ -7,20 +7,20 @@ namespace DB
|
||||
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
|
||||
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
|
||||
|
||||
void RestoreCoordinationLocal::setStage(const String &, const String &, const String &)
|
||||
void RestoreCoordinationLocal::setStage(const String &, const String &)
|
||||
{
|
||||
}
|
||||
|
||||
void RestoreCoordinationLocal::setError(const String &, const Exception &)
|
||||
void RestoreCoordinationLocal::setError(const Exception &)
|
||||
{
|
||||
}
|
||||
|
||||
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &)
|
||||
Strings RestoreCoordinationLocal::waitForStage(const String &)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
|
||||
Strings RestoreCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
@ -19,10 +19,10 @@ public:
|
||||
~RestoreCoordinationLocal() override;
|
||||
|
||||
/// Sets the current stage and waits for other hosts to come to this stage too.
|
||||
void setStage(const String & current_host, const String & new_stage, const String & message) override;
|
||||
void setError(const String & current_host, const Exception & exception) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
void setStage(const String & new_stage, const String & message) override;
|
||||
void setError(const Exception & exception) override;
|
||||
Strings waitForStage(const String & stage_to_wait) override;
|
||||
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
|
||||
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
|
||||
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
|
||||
|
@ -11,11 +11,19 @@ namespace DB
|
||||
namespace Stage = BackupCoordinationStage;
|
||||
|
||||
RestoreCoordinationRemote::RestoreCoordinationRemote(
|
||||
const String & root_zookeeper_path_, const String & restore_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_)
|
||||
: root_zookeeper_path(root_zookeeper_path_)
|
||||
, zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
|
||||
zkutil::GetZooKeeper get_zookeeper_,
|
||||
const String & root_zookeeper_path_,
|
||||
const String & restore_uuid_,
|
||||
const Strings & all_hosts_,
|
||||
const String & current_host_,
|
||||
bool is_internal_)
|
||||
: get_zookeeper(get_zookeeper_)
|
||||
, root_zookeeper_path(root_zookeeper_path_)
|
||||
, restore_uuid(restore_uuid_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
|
||||
, all_hosts(all_hosts_)
|
||||
, current_host(current_host_)
|
||||
, current_host_index(BackupCoordinationRemote::findCurrentHostIndex(all_hosts, current_host))
|
||||
, is_internal(is_internal_)
|
||||
{
|
||||
createRootNodes();
|
||||
@ -63,22 +71,22 @@ void RestoreCoordinationRemote::createRootNodes()
|
||||
}
|
||||
|
||||
|
||||
void RestoreCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
|
||||
void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
|
||||
{
|
||||
stage_sync->set(current_host, new_stage, message);
|
||||
}
|
||||
|
||||
void RestoreCoordinationRemote::setError(const String & current_host, const Exception & exception)
|
||||
void RestoreCoordinationRemote::setError(const Exception & exception)
|
||||
{
|
||||
stage_sync->setError(current_host, exception);
|
||||
}
|
||||
|
||||
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
|
||||
Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait)
|
||||
{
|
||||
return stage_sync->wait(all_hosts, stage_to_wait);
|
||||
}
|
||||
|
||||
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
|
||||
Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout)
|
||||
{
|
||||
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
|
||||
}
|
||||
|
@ -11,14 +11,21 @@ namespace DB
|
||||
class RestoreCoordinationRemote : public IRestoreCoordination
|
||||
{
|
||||
public:
|
||||
RestoreCoordinationRemote(const String & root_zookeeper_path_, const String & restore_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_);
|
||||
RestoreCoordinationRemote(
|
||||
zkutil::GetZooKeeper get_zookeeper_,
|
||||
const String & root_zookeeper_path_,
|
||||
const String & restore_uuid_,
|
||||
const Strings & all_hosts_,
|
||||
const String & current_host_,
|
||||
bool is_internal_);
|
||||
|
||||
~RestoreCoordinationRemote() override;
|
||||
|
||||
/// Sets the current stage and waits for other hosts to come to this stage too.
|
||||
void setStage(const String & current_host, const String & new_stage, const String & message) override;
|
||||
void setError(const String & current_host, const Exception & exception) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
|
||||
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
void setStage(const String & new_stage, const String & message) override;
|
||||
void setError(const Exception & exception) override;
|
||||
Strings waitForStage(const String & stage_to_wait) override;
|
||||
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
|
||||
|
||||
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
|
||||
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
|
||||
@ -44,10 +51,13 @@ private:
|
||||
|
||||
class ReplicatedDatabasesMetadataSync;
|
||||
|
||||
const String root_zookeeper_path;
|
||||
const String zookeeper_path;
|
||||
const String restore_uuid;
|
||||
const zkutil::GetZooKeeper get_zookeeper;
|
||||
const String root_zookeeper_path;
|
||||
const String restore_uuid;
|
||||
const String zookeeper_path;
|
||||
const Strings all_hosts;
|
||||
const String current_host;
|
||||
const size_t current_host_index;
|
||||
const bool is_internal;
|
||||
|
||||
std::optional<BackupCoordinationStageSync> stage_sync;
|
||||
|
@ -150,11 +150,11 @@ void RestorerFromBackup::setStage(const String & new_stage, const String & messa
|
||||
|
||||
if (restore_coordination)
|
||||
{
|
||||
restore_coordination->setStage(restore_settings.host_id, new_stage, message);
|
||||
restore_coordination->setStage(new_stage, message);
|
||||
if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
|
||||
restore_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
|
||||
restore_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
|
||||
else
|
||||
restore_coordination->waitForStage(all_hosts, new_stage);
|
||||
restore_coordination->waitForStage(new_stage);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,21 +4,22 @@
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
|
||||
#include <base/range.h>
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include "config.h"
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
#include <Server/HTTP/HTTPServer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Server/HTTP/HTTPServer.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/range.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_ODBC
|
||||
# include <Poco/Data/ODBC/Connector.h>
|
||||
#endif
|
||||
@ -89,7 +90,7 @@ void IBridge::defineOptions(Poco::Util::OptionSet & options)
|
||||
Poco::Util::Option("listen-host", "", "hostname or address to listen, default 127.0.0.1").argument("listen-host").binding("listen-host"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("http-timeout", "", "http timeout for socket, default 1800").argument("http-timeout").binding("http-timeout"));
|
||||
Poco::Util::Option("http-timeout", "", "http timeout for socket, default 180").argument("http-timeout").binding("http-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("max-server-connections", "", "max connections to server, default 1024").argument("max-server-connections").binding("max-server-connections"));
|
||||
@ -97,6 +98,9 @@ void IBridge::defineOptions(Poco::Util::OptionSet & options)
|
||||
options.addOption(
|
||||
Poco::Util::Option("keep-alive-timeout", "", "keepalive timeout, default 10").argument("keep-alive-timeout").binding("keep-alive-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("http-max-field-value-size", "", "max http field value size, default 1048576").argument("http-max-field-value-size").binding("http-max-field-value-size"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("log-level", "", "sets log level, default info") .argument("log-level").binding("logger.level"));
|
||||
|
||||
@ -165,6 +169,7 @@ void IBridge::initialize(Application & self)
|
||||
http_timeout = config().getUInt64("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT);
|
||||
max_server_connections = config().getUInt("max-server-connections", 1024);
|
||||
keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10);
|
||||
http_max_field_value_size = config().getUInt64("http-max-field-value-size", 1048576);
|
||||
|
||||
struct rlimit limit;
|
||||
const UInt64 gb = 1024 * 1024 * 1024;
|
||||
@ -226,6 +231,10 @@ int IBridge::main(const std::vector<std::string> & /*args*/)
|
||||
auto context = Context::createGlobal(shared_context.get());
|
||||
context->makeGlobalContext();
|
||||
|
||||
auto settings = context->getSettings();
|
||||
settings.set("http_max_field_value_size", http_max_field_value_size);
|
||||
context->setSettings(settings);
|
||||
|
||||
if (config().has("query_masking_rules"))
|
||||
SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
|
||||
|
||||
|
@ -45,6 +45,7 @@ private:
|
||||
std::string log_level;
|
||||
unsigned max_server_connections;
|
||||
size_t http_timeout;
|
||||
size_t http_max_field_value_size;
|
||||
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
@ -67,6 +67,8 @@ std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand()
|
||||
cmd_args.push_back(config.getString(configPrefix() + ".listen_host", DEFAULT_HOST));
|
||||
cmd_args.push_back("--http-timeout");
|
||||
cmd_args.push_back(std::to_string(getHTTPTimeout().totalMicroseconds()));
|
||||
cmd_args.push_back("--http-max-field-value-size");
|
||||
cmd_args.push_back("99999999999999999"); // something "big" to accept large datasets (issue 47616)
|
||||
if (config.has("logger." + configPrefix() + "_log"))
|
||||
{
|
||||
cmd_args.push_back("--log-path");
|
||||
|
@ -1131,6 +1131,8 @@ void ClientBase::onProfileEvents(Block & block)
|
||||
{
|
||||
if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
|
||||
{
|
||||
/// We need to restart the watch each time we flushed these events
|
||||
profile_events.watch.restart();
|
||||
initLogsOutputStream();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
@ -1144,7 +1146,6 @@ void ClientBase::onProfileEvents(Block & block)
|
||||
incrementProfileEventsBlock(profile_events.last_block, block);
|
||||
}
|
||||
}
|
||||
profile_events.watch.restart();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,6 @@
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <Parsers/ASTExplainQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
@ -684,43 +683,76 @@ void QueryFuzzer::fuzzTableName(ASTTableExpression & table)
|
||||
|
||||
void QueryFuzzer::fuzzExplainQuery(ASTExplainQuery & explain)
|
||||
{
|
||||
/// Fuzz ExplainKind
|
||||
explain.setExplainKind(fuzzExplainKind(explain.getKind()));
|
||||
|
||||
bool settings_have_fuzzed = false;
|
||||
for (auto & child : explain.children)
|
||||
{
|
||||
if (auto * settings_ast = typeid_cast<ASTSetQuery *>(child.get()))
|
||||
{
|
||||
fuzzExplainSettings(*settings_ast, explain.getKind());
|
||||
settings_have_fuzzed = true;
|
||||
}
|
||||
/// Fuzzing other child like Explain Query
|
||||
else
|
||||
{
|
||||
fuzz(child);
|
||||
}
|
||||
}
|
||||
|
||||
if (!settings_have_fuzzed)
|
||||
{
|
||||
auto settings_ast = std::make_shared<ASTSetQuery>();
|
||||
settings_ast->is_standalone = false;
|
||||
fuzzExplainSettings(*settings_ast, explain.getKind());
|
||||
explain.setSettings(settings_ast);
|
||||
}
|
||||
}
|
||||
|
||||
ASTExplainQuery::ExplainKind QueryFuzzer::fuzzExplainKind(ASTExplainQuery::ExplainKind kind)
|
||||
{
|
||||
if (fuzz_rand() % 20 == 0)
|
||||
{
|
||||
/// Do not modify ExplainKind
|
||||
return kind;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::ParsedAST);
|
||||
return ASTExplainQuery::ExplainKind::ParsedAST;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::AnalyzedSyntax);
|
||||
return ASTExplainQuery::ExplainKind::AnalyzedSyntax;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryTree);
|
||||
return ASTExplainQuery::ExplainKind::QueryTree;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryPlan);
|
||||
return ASTExplainQuery::ExplainKind::QueryPlan;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryPipeline);
|
||||
return ASTExplainQuery::ExplainKind::QueryPipeline;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryEstimates);
|
||||
return ASTExplainQuery::ExplainKind::QueryEstimates;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::TableOverride);
|
||||
return ASTExplainQuery::ExplainKind::TableOverride;
|
||||
}
|
||||
else if (fuzz_rand() % 11 == 0)
|
||||
{
|
||||
explain.setExplainKind(ASTExplainQuery::ExplainKind::CurrentTransaction);
|
||||
return ASTExplainQuery::ExplainKind::CurrentTransaction;
|
||||
}
|
||||
return kind;
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind)
|
||||
{
|
||||
auto & changes = settings_ast.changes;
|
||||
|
||||
static const std::unordered_map<ASTExplainQuery::ExplainKind, std::vector<String>> settings_by_kind
|
||||
= {{ASTExplainQuery::ExplainKind::ParsedAST, {"graph", "optimize"}},
|
||||
@ -732,44 +764,17 @@ void QueryFuzzer::fuzzExplainQuery(ASTExplainQuery & explain)
|
||||
{ASTExplainQuery::ExplainKind::TableOverride, {}},
|
||||
{ASTExplainQuery::ExplainKind::CurrentTransaction, {}}};
|
||||
|
||||
const auto & settings = settings_by_kind.at(explain.getKind());
|
||||
bool settings_have_fuzzed = false;
|
||||
for (auto & child : explain.children)
|
||||
{
|
||||
if (auto * settings_ast = typeid_cast<ASTSetQuery *>(child.get()))
|
||||
{
|
||||
fuzzExplainSettings(*settings_ast, settings);
|
||||
settings_have_fuzzed = true;
|
||||
}
|
||||
/// Fuzz other child like Explain Query
|
||||
else
|
||||
{
|
||||
fuzz(child);
|
||||
}
|
||||
}
|
||||
|
||||
if (!settings_have_fuzzed && !settings.empty())
|
||||
{
|
||||
auto settings_ast = std::make_shared<ASTSetQuery>();
|
||||
fuzzExplainSettings(*settings_ast, settings);
|
||||
explain.setSettings(settings_ast);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzzExplainSettings(ASTSetQuery & settings, const std::vector<String> & names)
|
||||
{
|
||||
auto & changes = settings.changes;
|
||||
|
||||
const auto & settings = settings_by_kind.at(kind);
|
||||
if (fuzz_rand() % 50 == 0 && !changes.empty())
|
||||
{
|
||||
changes.erase(changes.begin() + fuzz_rand() % changes.size());
|
||||
}
|
||||
|
||||
for (const auto & name : names)
|
||||
for (const auto & setting : settings)
|
||||
{
|
||||
if (fuzz_rand() % 5 == 0)
|
||||
{
|
||||
changes.emplace_back(name, true);
|
||||
changes.emplace_back(setting, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -910,6 +915,20 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
|
||||
if (auto * with_union = typeid_cast<ASTSelectWithUnionQuery *>(ast.get()))
|
||||
{
|
||||
fuzz(with_union->list_of_selects);
|
||||
/// Fuzzing SELECT query to EXPLAIN query randomly.
|
||||
/// And we only fuzzing the root query into an EXPLAIN query, not fuzzing subquery
|
||||
if (fuzz_rand() % 20 == 0 && current_ast_depth <= 1)
|
||||
{
|
||||
auto explain = std::make_shared<ASTExplainQuery>(fuzzExplainKind());
|
||||
|
||||
auto settings_ast = std::make_shared<ASTSetQuery>();
|
||||
settings_ast->is_standalone = false;
|
||||
fuzzExplainSettings(*settings_ast, explain->getKind());
|
||||
explain->setSettings(settings_ast);
|
||||
|
||||
explain->setExplainedQuery(ast);
|
||||
ast = explain;
|
||||
}
|
||||
}
|
||||
else if (auto * with_intersect_except = typeid_cast<ASTSelectIntersectExceptQuery *>(ast.get()))
|
||||
{
|
||||
@ -1086,7 +1105,17 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
|
||||
}
|
||||
else if (auto * explain_query = typeid_cast<ASTExplainQuery *>(ast.get()))
|
||||
{
|
||||
fuzzExplainQuery(*explain_query);
|
||||
/// Fuzzing EXPLAIN query to SELECT query randomly
|
||||
if (fuzz_rand() % 20 == 0 && explain_query->getExplainedQuery()->getQueryKind() == IAST::QueryKind::Select)
|
||||
{
|
||||
auto select_query = explain_query->getExplainedQuery()->clone();
|
||||
fuzz(select_query);
|
||||
ast = select_query;
|
||||
}
|
||||
else
|
||||
{
|
||||
fuzzExplainQuery(*explain_query);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -7,10 +7,11 @@
|
||||
|
||||
#include <pcg-random/pcg_random.hpp>
|
||||
|
||||
#include <Core/Field.h>
|
||||
#include <Parsers/ASTExplainQuery.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include "Parsers/IAST_fwd.h"
|
||||
#include <Core/Field.h>
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -22,7 +23,6 @@ class ASTCreateQuery;
|
||||
class ASTInsertQuery;
|
||||
class ASTColumnDeclaration;
|
||||
class ASTDropQuery;
|
||||
class ASTExplainQuery;
|
||||
class ASTSetQuery;
|
||||
struct ASTTableExpression;
|
||||
struct ASTWindowDefinition;
|
||||
@ -89,7 +89,8 @@ struct QueryFuzzer
|
||||
void fuzzWindowFrame(ASTWindowDefinition & def);
|
||||
void fuzzCreateQuery(ASTCreateQuery & create);
|
||||
void fuzzExplainQuery(ASTExplainQuery & explain);
|
||||
void fuzzExplainSettings(ASTSetQuery & settings, const std::vector<String> & names);
|
||||
ASTExplainQuery::ExplainKind fuzzExplainKind(ASTExplainQuery::ExplainKind kind = ASTExplainQuery::ExplainKind::QueryPipeline);
|
||||
void fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind);
|
||||
void fuzzColumnDeclaration(ASTColumnDeclaration & column);
|
||||
void fuzzTableName(ASTTableExpression & table);
|
||||
void fuzz(ASTs & asts);
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <limits>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/OptimizedRegularExpression.h>
|
||||
@ -14,13 +15,40 @@ namespace DB
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <bool thread_safe>
|
||||
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
struct Literal
|
||||
{
|
||||
std::string literal;
|
||||
bool prefix; /// this literal string is the prefix of the whole string.
|
||||
bool suffix; /// this literal string is the suffix of the whole string.
|
||||
void clear()
|
||||
{
|
||||
literal.clear();
|
||||
prefix = false;
|
||||
suffix = false;
|
||||
}
|
||||
};
|
||||
|
||||
using Literals = std::vector<Literal>;
|
||||
|
||||
size_t shortest_literal_length(const Literals & literals)
|
||||
{
|
||||
if (literals.empty()) return 0;
|
||||
size_t shortest = std::numeric_limits<size_t>::max();
|
||||
for (const auto & lit : literals)
|
||||
if (shortest > lit.literal.size())
|
||||
shortest = lit.literal.size();
|
||||
return shortest;
|
||||
}
|
||||
|
||||
const char * analyzeImpl(
|
||||
std::string_view regexp,
|
||||
std::string & required_substring,
|
||||
const char * pos,
|
||||
Literal & required_substring,
|
||||
bool & is_trivial,
|
||||
bool & required_substring_is_prefix)
|
||||
Literals & global_alternatives)
|
||||
{
|
||||
/** The expression is trivial if all the metacharacters in it are escaped.
|
||||
* The non-alternative string is
|
||||
@ -30,12 +58,11 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
* and also avoid substrings of the form `http://` or `www` and some other
|
||||
* (this is the hack for typical use case in web analytics applications).
|
||||
*/
|
||||
const char * begin = regexp.data();
|
||||
const char * pos = begin;
|
||||
const char * begin = pos;
|
||||
const char * end = regexp.data() + regexp.size();
|
||||
bool is_first_call = begin == regexp.data();
|
||||
int depth = 0;
|
||||
is_trivial = true;
|
||||
required_substring_is_prefix = false;
|
||||
required_substring.clear();
|
||||
bool has_alternative_on_depth_0 = false;
|
||||
bool has_case_insensitive_flag = false;
|
||||
@ -47,6 +74,80 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
Substrings trivial_substrings(1);
|
||||
Substring * last_substring = &trivial_substrings.back();
|
||||
|
||||
Literals cur_alternatives;
|
||||
|
||||
auto finish_cur_alternatives = [&]()
|
||||
{
|
||||
if (cur_alternatives.empty())
|
||||
return;
|
||||
|
||||
if (global_alternatives.empty())
|
||||
{
|
||||
global_alternatives = cur_alternatives;
|
||||
cur_alternatives.clear();
|
||||
return;
|
||||
}
|
||||
/// that means current alternatives have better quality.
|
||||
if (shortest_literal_length(global_alternatives) < shortest_literal_length(cur_alternatives))
|
||||
{
|
||||
global_alternatives.clear();
|
||||
global_alternatives = cur_alternatives;
|
||||
}
|
||||
cur_alternatives.clear();
|
||||
};
|
||||
|
||||
auto finish_non_trivial_char = [&](bool create_new_substr = true)
|
||||
{
|
||||
if (depth != 0)
|
||||
return;
|
||||
|
||||
for (auto & alter : cur_alternatives)
|
||||
{
|
||||
if (alter.suffix)
|
||||
{
|
||||
alter.literal += last_substring->first;
|
||||
}
|
||||
}
|
||||
|
||||
finish_cur_alternatives();
|
||||
|
||||
if (!last_substring->first.empty() && create_new_substr)
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
};
|
||||
|
||||
/// Resolve the string or alters in a group (xxxxx)
|
||||
auto finish_group = [&](Literal & group_required_string, Literals & group_alternatives)
|
||||
{
|
||||
for (auto & alter : group_alternatives)
|
||||
{
|
||||
if (alter.prefix)
|
||||
{
|
||||
alter.literal = last_substring->first + alter.literal;
|
||||
}
|
||||
}
|
||||
|
||||
if (group_required_string.prefix)
|
||||
last_substring->first += group_required_string.literal;
|
||||
else
|
||||
{
|
||||
finish_non_trivial_char();
|
||||
last_substring->first = group_required_string.literal;
|
||||
}
|
||||
/// if we can still append, no need to finish it. e.g. abc(de)fg should capture abcdefg
|
||||
if (!last_substring->first.empty() && !group_required_string.suffix)
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
|
||||
/// assign group alters to current alters.
|
||||
finish_cur_alternatives();
|
||||
cur_alternatives = std::move(group_alternatives);
|
||||
};
|
||||
|
||||
bool in_curly_braces = false;
|
||||
bool in_square_braces = false;
|
||||
|
||||
@ -73,25 +174,19 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
case '$':
|
||||
case '.':
|
||||
case '[':
|
||||
case ']':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '-':
|
||||
case '{':
|
||||
if (depth == 0 && !in_curly_braces && !in_square_braces)
|
||||
{
|
||||
if (last_substring->first.empty())
|
||||
last_substring->second = pos - begin;
|
||||
last_substring->first.push_back(*pos);
|
||||
}
|
||||
break;
|
||||
case '}':
|
||||
case '/':
|
||||
goto ordinary;
|
||||
default:
|
||||
/// all other escape sequences are not supported
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
finish_non_trivial_char();
|
||||
break;
|
||||
}
|
||||
|
||||
@ -100,28 +195,19 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
}
|
||||
|
||||
case '|':
|
||||
if (depth == 0)
|
||||
has_alternative_on_depth_0 = true;
|
||||
is_trivial = false;
|
||||
if (!in_square_braces && !last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
++pos;
|
||||
if (depth == 0)
|
||||
{
|
||||
has_alternative_on_depth_0 = true;
|
||||
goto finish;
|
||||
}
|
||||
break;
|
||||
|
||||
case '(':
|
||||
is_trivial = false;
|
||||
if (!in_square_braces)
|
||||
{
|
||||
++depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
|
||||
/// Check for case-insensitive flag.
|
||||
if (pos + 1 < end && pos[1] == '?')
|
||||
{
|
||||
@ -143,6 +229,28 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
|
||||
{
|
||||
pos += 2;
|
||||
}
|
||||
Literal group_required_substr;
|
||||
bool group_is_trival = true;
|
||||
Literals group_alters;
|
||||
pos = analyzeImpl(regexp, pos + 1, group_required_substr, group_is_trival, group_alters);
|
||||
/// pos should be ')', if not, then it is not a valid regular expression
|
||||
if (pos == end)
|
||||
return pos;
|
||||
|
||||
/// For ()? or ()* or (){0,1}, we can just ignore the whole group.
|
||||
if ((pos + 1 < end && (pos[1] == '?' || pos[1] == '*')) ||
|
||||
(pos + 2 < end && pos[1] == '{' && pos[2] == '0'))
|
||||
{
|
||||
finish_non_trivial_char();
|
||||
}
|
||||
else
|
||||
{
|
||||
finish_group(group_required_substr, group_alters);
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
@ -151,11 +259,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
in_square_braces = true;
|
||||
++depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
finish_non_trivial_char();
|
||||
++pos;
|
||||
break;
|
||||
|
||||
@ -163,38 +267,25 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
if (!in_square_braces)
|
||||
goto ordinary;
|
||||
|
||||
in_square_braces = false;
|
||||
--depth;
|
||||
if (depth == 0)
|
||||
in_square_braces = false;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
finish_non_trivial_char();
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
if (!in_square_braces)
|
||||
{
|
||||
--depth;
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty())
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
goto finish;
|
||||
}
|
||||
++pos;
|
||||
break;
|
||||
|
||||
case '^': case '$': case '.': case '+':
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty() && !in_square_braces)
|
||||
{
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
finish_non_trivial_char();
|
||||
++pos;
|
||||
break;
|
||||
|
||||
@ -206,12 +297,11 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
[[fallthrough]];
|
||||
case '*':
|
||||
is_trivial = false;
|
||||
if (!last_substring->first.empty() && !in_square_braces)
|
||||
if (depth == 0 && !last_substring->first.empty() && !in_square_braces)
|
||||
{
|
||||
last_substring->first.resize(last_substring->first.size() - 1);
|
||||
trivial_substrings.resize(trivial_substrings.size() + 1);
|
||||
last_substring = &trivial_substrings.back();
|
||||
}
|
||||
finish_non_trivial_char();
|
||||
++pos;
|
||||
break;
|
||||
|
||||
@ -236,13 +326,15 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
break;
|
||||
}
|
||||
}
|
||||
finish:
|
||||
|
||||
if (last_substring && last_substring->first.empty())
|
||||
trivial_substrings.pop_back();
|
||||
finish_non_trivial_char(false);
|
||||
|
||||
if (!is_trivial)
|
||||
{
|
||||
if (!has_alternative_on_depth_0 && !has_case_insensitive_flag)
|
||||
/// we calculate required substring even though has_alternative_on_depth_0.
|
||||
/// we will clear the required substring after putting it to alternatives.
|
||||
if (!has_case_insensitive_flag)
|
||||
{
|
||||
/// We choose the non-alternative substring of the maximum length for first search.
|
||||
|
||||
@ -262,19 +354,45 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
}
|
||||
}
|
||||
|
||||
if (max_length >= MIN_LENGTH_FOR_STRSTR)
|
||||
if (max_length >= MIN_LENGTH_FOR_STRSTR || (!is_first_call && max_length > 0))
|
||||
{
|
||||
required_substring = candidate_it->first;
|
||||
required_substring_is_prefix = candidate_it->second == 0;
|
||||
required_substring.literal = candidate_it->first;
|
||||
required_substring.prefix = candidate_it->second == 0;
|
||||
required_substring.suffix = candidate_it + 1 == trivial_substrings.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!trivial_substrings.empty())
|
||||
{
|
||||
required_substring = trivial_substrings.front().first;
|
||||
required_substring_is_prefix = trivial_substrings.front().second == 0;
|
||||
required_substring.literal = trivial_substrings.front().first;
|
||||
required_substring.prefix = trivial_substrings.front().second == 0;
|
||||
required_substring.suffix = true;
|
||||
}
|
||||
|
||||
/// if it is xxx|xxx|xxx, we should call the next xxx|xxx recursively and collect the result.
|
||||
if (has_alternative_on_depth_0)
|
||||
{
|
||||
/// compare the quality of required substring and alternatives and choose the better one.
|
||||
if (shortest_literal_length(global_alternatives) < required_substring.literal.size())
|
||||
global_alternatives = {required_substring};
|
||||
Literals next_alternatives;
|
||||
/// this two vals are useless, xxx|xxx cannot be trivial nor prefix.
|
||||
bool next_is_trivial = true;
|
||||
pos = analyzeImpl(regexp, pos, required_substring, next_is_trivial, next_alternatives);
|
||||
/// For xxx|xxx|xxx, we only conbine the alternatives and return a empty required_substring.
|
||||
if (next_alternatives.empty() || shortest_literal_length(next_alternatives) < required_substring.literal.size())
|
||||
{
|
||||
global_alternatives.push_back(required_substring);
|
||||
}
|
||||
else
|
||||
{
|
||||
global_alternatives.insert(global_alternatives.end(), next_alternatives.begin(), next_alternatives.end());
|
||||
}
|
||||
required_substring.clear();
|
||||
}
|
||||
|
||||
return pos;
|
||||
|
||||
/* std::cerr
|
||||
<< "regexp: " << regexp
|
||||
<< ", is_trivial: " << is_trivial
|
||||
@ -282,12 +400,31 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
<< ", required_substring_is_prefix: " << required_substring_is_prefix
|
||||
<< std::endl;*/
|
||||
}
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
std::string_view regexp_,
|
||||
std::string & required_substring,
|
||||
bool & is_trivial,
|
||||
bool & required_substring_is_prefix,
|
||||
std::vector<std::string> & alternatives)
|
||||
{
|
||||
Literals alternative_literals;
|
||||
Literal required_literal;
|
||||
analyzeImpl(regexp_, regexp_.data(), required_literal, is_trivial, alternative_literals);
|
||||
required_substring = std::move(required_literal.literal);
|
||||
required_substring_is_prefix = required_literal.prefix;
|
||||
for (auto & lit : alternative_literals)
|
||||
alternatives.push_back(std::move(lit.literal));
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
|
||||
{
|
||||
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
|
||||
std::vector<std::string> alternativesDummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
|
||||
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternativesDummy);
|
||||
|
||||
|
||||
/// Just three following options are supported
|
||||
if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
|
||||
|
@ -95,6 +95,15 @@ public:
|
||||
out_required_substring_is_prefix = required_substring_is_prefix;
|
||||
}
|
||||
|
||||
/// analyze function will extract the longest string literal or multiple alternative string literals from regexp for pre-checking if
|
||||
/// a string contains the string literal(s). If not, we can tell this string can never match the regexp.
|
||||
static void analyze(
|
||||
std::string_view regexp_,
|
||||
std::string & required_substring,
|
||||
bool & is_trivial,
|
||||
bool & required_substring_is_prefix,
|
||||
std::vector<std::string> & alternatives);
|
||||
|
||||
private:
|
||||
bool is_trivial;
|
||||
bool required_substring_is_prefix;
|
||||
@ -104,8 +113,6 @@ private:
|
||||
std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
|
||||
std::unique_ptr<RegexType> re2;
|
||||
unsigned number_of_subpatterns;
|
||||
|
||||
static void analyze(std::string_view regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);
|
||||
};
|
||||
|
||||
using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
|
||||
|
@ -10,9 +10,11 @@
|
||||
namespace mysqlxx
|
||||
{
|
||||
|
||||
std::string errorMessage(MYSQL * driver)
|
||||
std::string errorMessage(MYSQL * driver, const std::string & query)
|
||||
{
|
||||
return fmt::format("{} ({}:{})", mysql_error(driver), driver->host ? driver->host : "(nullptr)", driver->port);
|
||||
return fmt::format("{}{} ({}:{})", mysql_error(driver),
|
||||
query.empty() ? "" : " while executing query: '" + query + "'",
|
||||
driver->host ? driver->host : "(nullptr)", driver->port);
|
||||
}
|
||||
|
||||
void checkError(MYSQL * driver)
|
||||
|
@ -64,7 +64,7 @@ void Query::executeImpl()
|
||||
case CR_SERVER_LOST:
|
||||
throw ConnectionLost(errorMessage(mysql_driver), err_no);
|
||||
default:
|
||||
throw BadQuery(errorMessage(mysql_driver), err_no);
|
||||
throw BadQuery(errorMessage(mysql_driver, query), err_no);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -160,14 +160,16 @@ void Value::throwException(const char * text) const
|
||||
|
||||
if (!isNull())
|
||||
{
|
||||
info.append(": ");
|
||||
info.append(": '");
|
||||
info.append(m_data, m_length);
|
||||
info.append("'");
|
||||
}
|
||||
|
||||
if (res && res->getQuery())
|
||||
{
|
||||
info.append(", query: ");
|
||||
info.append(", query: '");
|
||||
info.append(res->getQuery()->str().substr(0, preview_length));
|
||||
info.append("'");
|
||||
}
|
||||
|
||||
throw CannotParseValue(info);
|
||||
|
@ -53,7 +53,7 @@ struct CannotParseValue : public Exception
|
||||
};
|
||||
|
||||
|
||||
std::string errorMessage(MYSQL * driver);
|
||||
std::string errorMessage(MYSQL * driver, const std::string & query = "");
|
||||
|
||||
/// For internal need of library.
|
||||
void checkError(MYSQL * driver);
|
||||
|
@ -1,8 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
|
||||
inline void tryRegisterAggregateFunctions()
|
||||
{
|
||||
static struct Register { Register() { DB::registerAggregateFunctions(); } } registered;
|
||||
}
|
||||
|
||||
inline void tryRegisterFunctions()
|
||||
{
|
||||
|
46
src/Common/tests/gtest_optimize_re.cpp
Normal file
46
src/Common/tests/gtest_optimize_re.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/OptimizedRegularExpression.h>
|
||||
|
||||
TEST(OptimizeRE, analyze)
|
||||
{
|
||||
auto test_f = [](const std::string & regexp, const std::string & answer, std::vector<std::string> expect_alternatives = {}, bool trival_expected = false)
|
||||
{
|
||||
std::string required;
|
||||
bool is_trivial;
|
||||
bool is_prefix;
|
||||
std::vector<std::string> alternatives;
|
||||
OptimizedRegularExpression::analyze(regexp, required, is_trivial, is_prefix, alternatives);
|
||||
std::cerr << regexp << std::endl;
|
||||
EXPECT_EQ(required, answer);
|
||||
EXPECT_EQ(alternatives, expect_alternatives);
|
||||
EXPECT_EQ(is_trivial, trival_expected);
|
||||
};
|
||||
test_f("abc", "abc", {}, true);
|
||||
test_f("c([^k]*)de", "");
|
||||
test_f("abc(de)fg", "abcdefg");
|
||||
test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"});
|
||||
test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"});
|
||||
test_f("abc|fgk|xyz", "", {"abc","fgk", "xyz"});
|
||||
test_f("(abc)", "abc");
|
||||
test_f("(abc|fgk)", "", {"abc","fgk"});
|
||||
test_f("(abc|fgk)(e|f|zkh|)", "", {"abc","fgk"});
|
||||
test_f("abc(abc|fg)xyzz", "xyzz", {"abcabcxyzz","abcfgxyzz"});
|
||||
test_f("abc[k]xyzz", "xyzz");
|
||||
test_f("(abc[k]xyzz)", "xyzz");
|
||||
test_f("abc((de)fg(hi))jk", "abcdefghijk");
|
||||
test_f("abc((?:de)fg(?:hi))jk", "abcdefghijk");
|
||||
test_f("abc((de)fghi+zzz)jk", "abcdefghi");
|
||||
test_f("abc((de)fg(hi))?jk", "abc");
|
||||
test_f("abc((de)fghi?zzz)jk", "abcdefgh");
|
||||
test_f("abc(*cd)jk", "cdjk");
|
||||
test_f(R"(abc(de|xyz|(\{xx\}))fg)", "abc", {"abcdefg", "abcxyzfg", "abc{xx}fg"});
|
||||
test_f("abc(abc|fg)?xyzz", "xyzz");
|
||||
test_f("abc(abc|fg){0,1}xyzz", "xyzz");
|
||||
test_f("abc(abc|fg)xyzz|bcdd?k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bc"});
|
||||
test_f("abc(abc|fg)xyzz|bc(dd?x|kk?y|(f))k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bck", "bcfk", "bc"});
|
||||
test_f("((?:abc|efg|xyz)/[a-zA-Z0-9]{1-50})(/?[^ ]*|)", "", {"abc/", "efg/", "xyz/"});
|
||||
test_f(R"([Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Daumoa(?:-feedfetcher|)|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|))", "", {"pider-", "bingbot", "Yeti-", "Yeti", "Catchpoint bot", "Catchpoint", "harlotte", "Daumoa-feedfetcher", "Daumoa", "-Googlebot", "Googlebot"});
|
||||
test_f("abc|(:?xx|yy|zz|x?)def", "", {"abc", "def"});
|
||||
test_f("abc|(:?xx|yy|zz|x?){1,2}def", "", {"abc", "def"});
|
||||
}
|
@ -140,6 +140,8 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const
|
||||
|
||||
writeText("max_requests_batch_size=", buf);
|
||||
write_int(coordination_settings->max_requests_batch_size);
|
||||
writeText("max_requests_batch_bytes_size=", buf);
|
||||
write_int(coordination_settings->max_requests_batch_bytes_size);
|
||||
writeText("max_request_queue_size=", buf);
|
||||
write_int(coordination_settings->max_request_queue_size);
|
||||
writeText("max_requests_quick_batch_size=", buf);
|
||||
|
@ -39,7 +39,8 @@ struct Settings;
|
||||
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
|
||||
M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
|
||||
M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
|
||||
M(UInt64, max_requests_quick_batch_size, 10, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
|
||||
M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
|
||||
M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
|
||||
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
|
||||
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
|
||||
|
@ -73,6 +73,7 @@ void KeeperDispatcher::requestThread()
|
||||
auto coordination_settings = configuration_and_settings->coordination_settings;
|
||||
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
|
||||
uint64_t max_batch_size = coordination_settings->max_requests_batch_size;
|
||||
uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size;
|
||||
|
||||
/// The code below do a very simple thing: batch all write (quorum) requests into vector until
|
||||
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from
|
||||
@ -89,6 +90,7 @@ void KeeperDispatcher::requestThread()
|
||||
break;
|
||||
|
||||
KeeperStorage::RequestsForSessions current_batch;
|
||||
size_t current_batch_bytes_size = 0;
|
||||
|
||||
bool has_read_request = false;
|
||||
|
||||
@ -96,6 +98,7 @@ void KeeperDispatcher::requestThread()
|
||||
/// Otherwise we will process it locally.
|
||||
if (coordination_settings->quorum_reads || !request.request->isReadRequest())
|
||||
{
|
||||
current_batch_bytes_size += request.request->bytesSize();
|
||||
current_batch.emplace_back(request);
|
||||
|
||||
const auto try_get_request = [&]
|
||||
@ -108,7 +111,10 @@ void KeeperDispatcher::requestThread()
|
||||
if (!coordination_settings->quorum_reads && request.request->isReadRequest())
|
||||
has_read_request = true;
|
||||
else
|
||||
{
|
||||
current_batch_bytes_size += request.request->bytesSize();
|
||||
current_batch.emplace_back(request);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -116,9 +122,11 @@ void KeeperDispatcher::requestThread()
|
||||
return false;
|
||||
};
|
||||
|
||||
/// If we have enough requests in queue, we will try to batch at least max_quick_batch_size of them.
|
||||
/// TODO: Deprecate max_requests_quick_batch_size and use only max_requests_batch_size and max_requests_batch_bytes_size
|
||||
size_t max_quick_batch_size = coordination_settings->max_requests_quick_batch_size;
|
||||
while (!shutdown_called && !has_read_request && current_batch.size() < max_quick_batch_size && try_get_request())
|
||||
while (!shutdown_called && !has_read_request &&
|
||||
current_batch.size() < max_quick_batch_size && current_batch_bytes_size < max_batch_bytes_size &&
|
||||
try_get_request())
|
||||
;
|
||||
|
||||
const auto prev_result_done = [&]
|
||||
@ -129,7 +137,8 @@ void KeeperDispatcher::requestThread()
|
||||
};
|
||||
|
||||
/// Waiting until previous append will be successful, or batch is big enough
|
||||
while (!shutdown_called && !has_read_request && !prev_result_done() && current_batch.size() <= max_batch_size)
|
||||
while (!shutdown_called && !has_read_request && !prev_result_done() &&
|
||||
current_batch.size() <= max_batch_size && current_batch_bytes_size < max_batch_bytes_size)
|
||||
{
|
||||
try_get_request();
|
||||
}
|
||||
@ -147,6 +156,8 @@ void KeeperDispatcher::requestThread()
|
||||
/// Process collected write requests batch
|
||||
if (!current_batch.empty())
|
||||
{
|
||||
LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size);
|
||||
|
||||
auto result = server->putRequestBatch(current_batch);
|
||||
|
||||
if (result)
|
||||
@ -158,6 +169,7 @@ void KeeperDispatcher::requestThread()
|
||||
{
|
||||
addErrorResponses(current_batch, Coordination::Error::ZCONNECTIONLOSS);
|
||||
current_batch.clear();
|
||||
current_batch_bytes_size = 0;
|
||||
}
|
||||
|
||||
prev_batch = std::move(current_batch);
|
||||
|
@ -611,6 +611,7 @@ class IColumn;
|
||||
M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \
|
||||
M(Bool, query_plan_remove_redundant_sorting, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries", 0) \
|
||||
M(Bool, query_plan_remove_redundant_distinct, true, "Remove redundant Distinct step in query plan", 0) \
|
||||
M(Bool, query_plan_optimize_projection, true, "Use query plan for aggregation-in-order optimisation", 0) \
|
||||
M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard 'extractAllGroupsHorizontal' against consuming too much memory with greedy RE.", 0) \
|
||||
\
|
||||
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
|
||||
@ -933,7 +934,7 @@ class IColumn;
|
||||
M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
|
||||
\
|
||||
M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
|
||||
M(Bool, regexp_dict_allow_hyperscan, false, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
|
||||
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
|
||||
|
||||
// End of FORMAT_FACTORY_SETTINGS
|
||||
// Please add settings non-related to formats into the COMMON_SETTINGS above.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user