Merge remote-tracking branch 'ck/master' into storagehive_alter

This commit is contained in:
lgbo-ustc 2022-06-23 09:38:06 +08:00
commit 0fba75b21d
468 changed files with 13497 additions and 7287 deletions

View File

@ -13,3 +13,6 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
## Upcoming events
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/286304312/) Please join us for an evening of talks (in English), food and discussion. Featuring talks of ClickHouse in production and at least one on the deep internals of ClickHouse itself.

View File

@ -7,6 +7,7 @@
#include <replxx.hxx>
#include <base/types.h>
#include <base/defines.h>
class LineReader
{
@ -20,8 +21,8 @@ public:
void addWords(Words && new_words);
private:
Words words;
Words words_no_case;
Words words TSA_GUARDED_BY(mutex);
Words words_no_case TSA_GUARDED_BY(mutex);
std::mutex mutex;
};
@ -29,7 +30,7 @@ public:
using Patterns = std::vector<const char *>;
LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters);
virtual ~LineReader() {}
virtual ~LineReader() = default;
/// Reads the whole line until delimiter (in multiline mode) or until the last line without extender.
/// If resulting line is empty, it means the user interrupted the input.

View File

@ -124,6 +124,23 @@
#endif
#endif
// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
#if defined(__clang__)
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) // data is protected by given capability
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) // pointed-to data is protected by the given capability
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) // thread needs exclusive possession of given capability
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) // thread needs shared possession of given capability
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) // annotated lock must be locked after given lock
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) // disable TSA for a function
#else
# define TSA_GUARDED_BY(...)
# define TSA_PT_GUARDED_BY(...)
# define TSA_REQUIRES(...)
# define TSA_REQUIRES_SHARED(...)
# define TSA_NO_THREAD_SAFETY_ANALYSIS
#endif
/// A template function for suppressing warnings about unused variables or function results.
template <typename... Args>
constexpr void UNUSED(Args &&... args [[maybe_unused]])

View File

@ -19,7 +19,6 @@ if (COMPILER_CLANG)
# Add some warnings that are not available even with -Wall -Wextra -Wpedantic.
# We want to get everything out of the compiler for code quality.
add_warning(everything)
add_warning(pedantic)
no_warning(vla-extension)
no_warning(zero-length-array)
@ -51,6 +50,7 @@ if (COMPILER_CLANG)
no_warning(vla)
no_warning(weak-template-vtables)
no_warning(weak-vtables)
no_warning(thread-safety-negative) # experimental flag, too many false positives
# TODO Enable conversion, sign-conversion, double-promotion warnings.
elseif (COMPILER_GCC)
# Add compiler options only to c++ compiler

View File

@ -78,6 +78,9 @@ target_compile_options(cxx PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-nostdinc++>)
# Third party library may have substandard code.
target_compile_options(cxx PRIVATE -w)
# Enable support for Clang-Thread-Safety-Analysis in libcxx
target_compile_definitions(cxx PUBLIC -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS)
target_link_libraries(cxx PUBLIC cxxabi)
# For __udivmodti4, __divmodti4.

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit 81b413cc1c2a33ad4e96df856b89184efbd6221c
Subproject commit 6062e711a919fb3b669b243b7dceabd045d0e4a2

View File

@ -43,8 +43,5 @@ COPY demo_data.txt /
ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format
RUN apt install -y python3 python3-pip
RUN pip3 install flask requests
COPY http_api_server.py /
COPY start.sh /

View File

@ -7,5 +7,5 @@ mysql -u root -e "GRANT ALL ON * . * TO 'test'@'localhost'"
schematool -initSchema -dbType mysql
#nohup hiveserver2 &
nohup hive --service metastore &
bash /prepare_hive_data.sh
python3 http_api_server.py
while true; do sleep 60; done

View File

@ -1,7 +1,7 @@
version: '2.3'
services:
hdfs1:
image: lgboustc/hive_test:v1.0
image: lgboustc/hive_test:v2.0
hostname: hivetest
restart: always
entrypoint: bash /start.sh

View File

@ -10,7 +10,7 @@ set -x
#
# But under thread fuzzer, TSan build is too slow and this produces some flaky
# tests, so for now, as a temporary solution it had been disabled.
if ! test -f package_folder/*tsan*.deb; then
if ! test -f package_folder/clickhouse-server*tsan*.deb; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000

View File

@ -18,8 +18,10 @@ def get_options(i, backward_compatibility_check):
options.append("--db-engine=Ordinary")
if i % 3 == 2 and not backward_compatibility_check:
options.append('''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i))
client_options.append('allow_experimental_database_replicated=1')
options.append(
'''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
)
client_options.append("allow_experimental_database_replicated=1")
# If database name is not specified, new database is created for each functional test.
# Run some threads with one database for all tests.
@ -37,38 +39,58 @@ def get_options(i, backward_compatibility_check):
if i % 15 == 11:
client_options.append("join_algorithm='auto'")
client_options.append('max_rows_in_join=1000')
client_options.append("max_rows_in_join=1000")
if i == 13:
client_options.append('memory_tracker_fault_probability=0.001')
client_options.append("memory_tracker_fault_probability=0.001")
if client_options:
options.append(" --client-option " + ' '.join(client_options))
options.append(" --client-option " + " ".join(client_options))
return ' '.join(options)
return " ".join(options)
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit, backward_compatibility_check):
backward_compatibility_check_option = '--backward-compatibility-check' if backward_compatibility_check else ''
global_time_limit_option = ''
def run_func_test(
cmd,
output_prefix,
num_processes,
skip_tests_option,
global_time_limit,
backward_compatibility_check,
):
backward_compatibility_check_option = (
"--backward-compatibility-check" if backward_compatibility_check else ""
)
global_time_limit_option = ""
if global_time_limit:
global_time_limit_option = "--global_time_limit={}".format(global_time_limit)
output_paths = [os.path.join(output_prefix, "stress_test_run_{}.txt".format(i)) for i in range(num_processes)]
output_paths = [
os.path.join(output_prefix, "stress_test_run_{}.txt".format(i))
for i in range(num_processes)
]
pipes = []
for i in range(0, len(output_paths)):
f = open(output_paths[i], 'w')
full_command = "{} {} {} {} {}".format(cmd, get_options(i, backward_compatibility_check), global_time_limit_option, skip_tests_option, backward_compatibility_check_option)
f = open(output_paths[i], "w")
full_command = "{} {} {} {} {}".format(
cmd,
get_options(i, backward_compatibility_check),
global_time_limit_option,
skip_tests_option,
backward_compatibility_check_option,
)
logging.info("Run func tests '%s'", full_command)
p = Popen(full_command, shell=True, stdout=f, stderr=f)
pipes.append(p)
time.sleep(0.5)
return pipes
def compress_stress_logs(output_path, files_prefix):
cmd = f"cd {output_path} && tar -zcf stress_run_logs.tar.gz {files_prefix}* && rm {files_prefix}*"
check_output(cmd, shell=True)
def call_with_retry(query, timeout=30, retry_count=5):
for i in range(retry_count):
code = call(query, shell=True, stderr=STDOUT, timeout=timeout)
@ -77,6 +99,7 @@ def call_with_retry(query, timeout=30, retry_count=5):
else:
break
def make_query_command(query):
return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi --max_memory_usage_for_user=0"""
@ -93,28 +116,34 @@ def prepare_for_hung_check(drop_databases):
# ThreadFuzzer significantly slows down server and causes false-positive hung check failures
call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
call_with_retry(make_query_command('SELECT 1 FORMAT Null'))
call_with_retry(make_query_command("SELECT 1 FORMAT Null"))
# Some tests execute SYSTEM STOP MERGES or similar queries.
# It may cause some ALTERs to hang.
# Possibly we should fix tests and forbid to use such queries without specifying table.
call_with_retry(make_query_command('SYSTEM START MERGES'))
call_with_retry(make_query_command('SYSTEM START DISTRIBUTED SENDS'))
call_with_retry(make_query_command('SYSTEM START TTL MERGES'))
call_with_retry(make_query_command('SYSTEM START MOVES'))
call_with_retry(make_query_command('SYSTEM START FETCHES'))
call_with_retry(make_query_command('SYSTEM START REPLICATED SENDS'))
call_with_retry(make_query_command('SYSTEM START REPLICATION QUEUES'))
call_with_retry(make_query_command('SYSTEM DROP MARK CACHE'))
call_with_retry(make_query_command("SYSTEM START MERGES"))
call_with_retry(make_query_command("SYSTEM START DISTRIBUTED SENDS"))
call_with_retry(make_query_command("SYSTEM START TTL MERGES"))
call_with_retry(make_query_command("SYSTEM START MOVES"))
call_with_retry(make_query_command("SYSTEM START FETCHES"))
call_with_retry(make_query_command("SYSTEM START REPLICATED SENDS"))
call_with_retry(make_query_command("SYSTEM START REPLICATION QUEUES"))
call_with_retry(make_query_command("SYSTEM DROP MARK CACHE"))
# Issue #21004, live views are experimental, so let's just suppress it
call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'"))
# Kill other queries which known to be slow
# It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds
call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'"))
call_with_retry(
make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'")
)
# Long query from 00084_external_agregation
call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'"))
call_with_retry(
make_query_command(
"KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'"
)
)
if drop_databases:
for i in range(5):
@ -123,23 +152,35 @@ def prepare_for_hung_check(drop_databases):
# Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds).
#
# Also specify max_untracked_memory to allow 1GiB of memory to overcommit.
databases = check_output(make_query_command('SHOW DATABASES'), shell=True, timeout=30).decode('utf-8').strip().split()
databases = (
check_output(
make_query_command("SHOW DATABASES"), shell=True, timeout=30
)
.decode("utf-8")
.strip()
.split()
)
for db in databases:
if db == "system":
continue
command = make_query_command(f'DROP DATABASE {db}')
command = make_query_command(f"DROP DATABASE {db}")
# we don't wait for drop
Popen(command, shell=True)
break
except Exception as ex:
logging.error("Failed to SHOW or DROP databasese, will retry %s", str(ex))
logging.error(
"Failed to SHOW or DROP databasese, will retry %s", str(ex)
)
time.sleep(i)
else:
raise Exception("Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries")
raise Exception(
"Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries"
)
# Wait for last queries to finish if any, not longer than 300 seconds
call(make_query_command("""
call(
make_query_command(
"""
select sleepEachRow((
select maxOrDefault(300 - elapsed) + 1
from system.processes
@ -147,39 +188,58 @@ def prepare_for_hung_check(drop_databases):
) / 300)
from numbers(300)
format Null
"""), shell=True, stderr=STDOUT, timeout=330)
"""
),
shell=True,
stderr=STDOUT,
timeout=330,
)
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
# which still run some new queries. Let's ignore them.
try:
query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """
output = check_output(query, shell=True, stderr=STDOUT, timeout=30).decode('utf-8').strip()
output = (
check_output(query, shell=True, stderr=STDOUT, timeout=30)
.decode("utf-8")
.strip()
)
if int(output) == 0:
return False
except:
pass
return True
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for running stresstest")
parser.add_argument("--test-cmd", default='/usr/bin/clickhouse-test')
parser.add_argument("--skip-func-tests", default='')
parser.add_argument("--client-cmd", default='clickhouse-client')
parser.add_argument("--server-log-folder", default='/var/log/clickhouse-server')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for running stresstest"
)
parser.add_argument("--test-cmd", default="/usr/bin/clickhouse-test")
parser.add_argument("--skip-func-tests", default="")
parser.add_argument("--client-cmd", default="clickhouse-client")
parser.add_argument("--server-log-folder", default="/var/log/clickhouse-server")
parser.add_argument("--output-folder")
parser.add_argument("--global-time-limit", type=int, default=1800)
parser.add_argument("--num-parallel", type=int, default=cpu_count())
parser.add_argument('--backward-compatibility-check', action='store_true')
parser.add_argument('--hung-check', action='store_true', default=False)
parser.add_argument("--backward-compatibility-check", action="store_true")
parser.add_argument("--hung-check", action="store_true", default=False)
# make sense only for hung check
parser.add_argument('--drop-databases', action='store_true', default=False)
parser.add_argument("--drop-databases", action="store_true", default=False)
args = parser.parse_args()
if args.drop_databases and not args.hung_check:
raise Exception("--drop-databases only used in hung check (--hung-check)")
func_pipes = []
func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit, args.backward_compatibility_check)
func_pipes = run_func_test(
args.test_cmd,
args.output_folder,
args.num_parallel,
args.skip_func_tests,
args.global_time_limit,
args.backward_compatibility_check,
)
logging.info("Will wait functests to finish")
while True:
@ -205,32 +265,41 @@ if __name__ == "__main__":
have_long_running_queries = True
logging.error("Failed to prepare for hung check %s", str(ex))
logging.info("Checking if some queries hung")
cmd = ' '.join([args.test_cmd,
# Do not track memory allocations up to 1Gi,
# this will allow to ignore server memory limit (max_server_memory_usage) for this query.
#
# NOTE: memory_profiler_step should be also adjusted, because:
#
# untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step)
#
# NOTE: that if there will be queries with GROUP BY, this trick
# will not work due to CurrentMemoryTracker::check() from
# Aggregator code.
# But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY.
"--client-option", "max_untracked_memory=1Gi",
"--client-option", "max_memory_usage_for_user=0",
"--client-option", "memory_profiler_step=1Gi",
# Use system database to avoid CREATE/DROP DATABASE queries
"--database=system",
"--hung-check",
"00001_select_1"
])
cmd = " ".join(
[
args.test_cmd,
# Do not track memory allocations up to 1Gi,
# this will allow to ignore server memory limit (max_server_memory_usage) for this query.
#
# NOTE: memory_profiler_step should be also adjusted, because:
#
# untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step)
#
# NOTE: that if there will be queries with GROUP BY, this trick
# will not work due to CurrentMemoryTracker::check() from
# Aggregator code.
# But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY.
"--client-option",
"max_untracked_memory=1Gi",
"--client-option",
"max_memory_usage_for_user=0",
"--client-option",
"memory_profiler_step=1Gi",
# Use system database to avoid CREATE/DROP DATABASE queries
"--database=system",
"--hung-check",
"--stress",
"00001_select_1",
]
)
res = call(cmd, shell=True, stderr=STDOUT)
hung_check_status = "No queries hung\tOK\n"
if res != 0 and have_long_running_queries:
logging.info("Hung check failed with exit code {}".format(res))
hung_check_status = "Hung check failed\tFAIL\n"
with open(os.path.join(args.output_folder, "test_results.tsv"), 'w+') as results:
with open(
os.path.join(args.output_folder, "test_results.tsv"), "w+"
) as results:
results.write(hung_check_status)
logging.info("Stress test finished")

View File

@ -66,7 +66,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).

View File

@ -5,6 +5,8 @@ sidebar_label: Command-Line Client
# Command-line Client
## clickhouse-client
ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see [Configuring](#interfaces_cli_configuration).
[Install](../getting-started/install.md) it from the `clickhouse-client` package and run it with the command `clickhouse-client`.
@ -115,7 +117,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--user, -u` The username. Default value: default.
- `--password` The password. Default value: empty string.
- `--query, -q` The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option.
- `--queries-file, -qf` file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--queries-file` file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).
- `--multiline, -m` If specified, allow multiline queries (do not send the query on Enter).
- `--multiquery, -n` If specified, allow processing multiple queries separated by semicolons.
@ -183,4 +185,3 @@ If the configuration above is applied, the ID of a query is shown in the followi
``` text
speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d
```

View File

@ -34,7 +34,7 @@ Example of configuration:
<named_collections>
<s3_mydata>
<access_key_id>AKIAIOSFODNN7EXAMPLE</access_key_id>
<secret_access_key> wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY</secret_access_key>
<secret_access_key>wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY</secret_access_key>
<format>CSV</format>
<url>https://s3.us-east-1.amazonaws.com/yourbucket/mydata/</url>
</s3_mydata>
@ -227,4 +227,4 @@ SELECT dictGet('dict', 'b', 2);
┌─dictGet('dict', 'b', 2)─┐
│ two │
└─────────────────────────┘
```
```

View File

@ -31,12 +31,12 @@ $ clickhouse-local --structure "table_structure" --input-format "format_of_incom
Arguments:
- `-S`, `--structure` — table structure for input data.
- `-if`, `--input-format` — input format, `TSV` by default.
- `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
- `-qf`, `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `-of`, `--format`, `--output-format` — output format, `TSV` by default.
- `--format`, `--output-format` — output format, `TSV` by default.
- `-d`, `--database` — default database, `_local` by default.
- `--stacktrace` — whether to dump debug output in case of exception.
- `--echo` — print query before execution.

View File

@ -230,12 +230,21 @@ ClickHouse supports general purpose codecs and specialized codecs.
### General Purpose Codecs
Codecs:
#### NONE
- `NONE` — No compression.
- `LZ4` — Lossless [data compression algorithm](https://github.com/lz4/lz4) used by default. Applies LZ4 fast compression.
- `LZ4HC[(level)]` — LZ4 HC (high compression) algorithm with configurable level. Default level: 9. Setting `level <= 0` applies the default level. Possible levels: \[1, 12\]. Recommended level range: \[4, 9\].
- `ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: \[1, 22\]. Default value: 1.
`NONE` — No compression.
#### LZ4
`LZ4` — Lossless [data compression algorithm](https://github.com/lz4/lz4) used by default. Applies LZ4 fast compression.
#### LZ4HC
`LZ4HC[(level)]` — LZ4 HC (high compression) algorithm with configurable level. Default level: 9. Setting `level <= 0` applies the default level. Possible levels: \[1, 12\]. Recommended level range: \[4, 9\].
#### ZSTD
`ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: \[1, 22\]. Default value: 1.
High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage.
@ -243,13 +252,25 @@ High compression levels are useful for asymmetric scenarios, like compress once,
These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation.
Specialized codecs:
#### Delta
- `Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1.
- `DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
- `Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
- `FPC` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
- `T64` — Compression approach that crops unused high bits of values in integer data types (including `Enum`, `Date` and `DateTime`). At each step of its algorithm, codec takes a block of 64 values, puts them into 64x64 bit matrix, transposes it, crops the unused bits of values and returns the rest as a sequence. Unused bits are the bits, that do not differ between maximum and minimum values in the whole data part for which the compression is used.
`Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1.
#### DoubleDelta
`DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
#### Gorilla
`Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
#### FPC
`FPC` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
#### T64
`T64` — Compression approach that crops unused high bits of values in integer data types (including `Enum`, `Date` and `DateTime`). At each step of its algorithm, codec takes a block of 64 values, puts them into 64x64 bit matrix, transposes it, crops the unused bits of values and returns the rest as a sequence. Unused bits are the bits, that do not differ between maximum and minimum values in the whole data part for which the compression is used.
`DoubleDelta` and `Gorilla` codecs are used in Gorilla TSDB as the components of its compressing algorithm. Gorilla approach is effective in scenarios when there is a sequence of slowly changing values with their timestamps. Timestamps are effectively compressed by the `DoubleDelta` codec, and values are effectively compressed by the `Gorilla` codec. For example, to get an effectively stored table, you can create it in the following configuration:
@ -268,14 +289,20 @@ These codecs don't actually compress data, but instead encrypt data on disk. The
Encryption codecs:
- `CODEC('AES-128-GCM-SIV')` — Encrypts data with AES-128 in [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV mode.
- `CODEC('AES-256-GCM-SIV')` — Encrypts data with AES-256 in GCM-SIV mode.
#### AES_128_GCM_SIV
`CODEC('AES-128-GCM-SIV')` — Encrypts data with AES-128 in [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV mode.
#### AES-256-GCM-SIV
`CODEC('AES-256-GCM-SIV')` — Encrypts data with AES-256 in GCM-SIV mode.
These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content).
:::warning
Most engines including the "*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed.
Most engines including the "\*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed.
:::
:::warning

View File

@ -7,20 +7,30 @@ sidebar_label: Window Functions
ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:
| Feature | Support or workaround |
| --------| ----------|
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported |
| `ROWS` frame | supported |
| `RANGE` frame | supported, the default |
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead |
| `GROUPS` frame | not supported |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
| `rank()`, `dense_rank()`, `row_number()` | supported |
| `lag/lead(value, offset)` | Not supported. Workarounds: |
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| Feature | Support or workaround |
|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | supported |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported |
| `ROWS` frame | supported |
| `RANGE` frame | supported, the default |
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead |
| `GROUPS` frame | not supported |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
| `rank()`, `dense_rank()`, `row_number()` | supported |
| `lag/lead(value, offset)` | Not supported. Workarounds: |
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
## ClickHouse-specific Window Functions
### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS])
Finds non-negative derivative for given `metric_column` by `timestamp_column`.
`INTERVAL` can be omitted, default is `INTERVAL 1 SECOND`.
The computed value is the following for each row:
- `0` for 1st row,
- ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row.
## References

View File

@ -121,7 +121,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
- `--user, -u` — имя пользователя, по умолчанию — default.
- `--password` — пароль, по умолчанию — пустая строка.
- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме.
- `--queries-file, -qf` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
- `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
- `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД default).
- `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter.
- `--multiquery, -n` — если указано — разрешить выполнять несколько запросов, разделённых точкой с запятой.

View File

@ -28,12 +28,12 @@ $ clickhouse-local --structure "table_structure" --input-format "format_of_incom
Ключи команды:
- `-S`, `--structure` — структура таблицы, в которую будут помещены входящие данные.
- `-if`, `--input-format` — формат входящих данных. По умолчанию — `TSV`.
- `--input-format` — формат входящих данных. По умолчанию — `TSV`.
- `-f`, `--file` — путь к файлу с данными. По умолчанию — `stdin`.
- `-q`, `--query` — запросы на выполнение. Разделитель запросов — `;`.
- `-qf`, `--queries-file` - путь к файлу с запросами для выполнения. Необходимо задать либо параметр `query`, либо `queries-file`.
- `--queries-file` - путь к файлу с запросами для выполнения. Необходимо задать либо параметр `query`, либо `queries-file`.
- `-N`, `--table` — имя таблицы, в которую будут помещены входящие данные. По умолчанию - `table`.
- `-of`, `--format`, `--output-format` — формат выходных данных. По умолчанию — `TSV`.
- `--format`, `--output-format` — формат выходных данных. По умолчанию — `TSV`.
- `-d`, `--database` — база данных по умолчанию. Если не указано, используется значение `_local`.
- `--stacktrace` — вывод отладочной информации при исключениях.
- `--echo` — перед выполнением запрос выводится в консоль.
@ -109,4 +109,3 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
├──────────┼──────────┤
...
```

View File

@ -43,7 +43,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать `ENGINE` - движок таблицы для хранения данных.
При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE`.
При создании материализованного представления с использованием `TO [db].[table]`, нельзя указывать `POPULATE`.
Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление.

View File

@ -29,12 +29,12 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin
参数:
- `-S`, `--structure` — 输入数据的表结构。
- `-if`, `--input-format` — 输入格式化类型, 默认是`TSV`。
- `--input-format` — 输入格式化类型, 默认是`TSV`。
- `-f`, `--file` — 数据路径, 默认是`stdin`。
- `-q`, `--query` — 要查询的SQL语句使用`;`做分隔符。您必须指定`query`或`queries-file`选项。
- `-qf`, `--queries-file` - 包含执行查询的文件路径。您必须指定`query`或`queries-file`选项。
- `--queries-file` - 包含执行查询的文件路径。您必须指定`query`或`queries-file`选项。
- `-N`, `--table` — 数据输出的表名,默认是`table`。
- `-of`, `--format`, `--output-format` — 输出格式化类型, 默认是`TSV`。
- `--format`, `--output-format` — 输出格式化类型, 默认是`TSV`。
- `-d`, `--database` — 默认数据库名,默认是`_local`。
- `--stacktrace` — 是否在出现异常时输出栈信息。
- `--echo` — 执行前打印查询。
@ -53,7 +53,7 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin
## 示例 {#examples}
``` bash
echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table"
echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" --input-format "CSV" -q "SELECT * FROM table"
Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
1 2
3 4

View File

@ -994,7 +994,7 @@ void Client::processConfig()
/// The value of the option is used as the text of query (or of multiple queries).
/// If stdin is not a terminal, INSERT data for the first query is read from it.
/// - stdin is not a terminal. In this case queries are read from it.
/// - -qf (--queries-file) command line option is present.
/// - --queries-file command line option is present.
/// The value of the option is used as file with query (or of multiple queries) to execute.
delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file"));

View File

@ -3,6 +3,7 @@
#include <mutex>
#include <Poco/Util/Application.h>
#include <base/defines.h>
namespace DB
{
@ -24,9 +25,9 @@ public:
private:
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
ConfigurationPtr config;
ConfigurationPtr config TSA_GUARDED_BY(keeper_dispatcher_mutex);
};
}

View File

@ -1,6 +1,8 @@
#pragma once
#include "SharedLibraryHandler.h"
#include <base/defines.h>
#include <unordered_map>
#include <mutex>
@ -30,7 +32,7 @@ public:
private:
/// map: dict_id -> sharedLibraryHandler
std::unordered_map<std::string, SharedLibraryHandlerPtr> library_handlers;
std::unordered_map<std::string, SharedLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::mutex mutex;
};

View File

@ -3,7 +3,6 @@
#include <Client/ClientBase.h>
#include <Client/LocalConnection.h>
#include <Common/ProgressIndication.h>
#include <Common/StatusFile.h>
#include <Common/InterruptListener.h>
#include <Loggers/Loggers.h>

View File

@ -4,6 +4,7 @@
#include <nanodbc/nanodbc.h>
#include <mutex>
#include <base/BorrowedObjectPool.h>
#include <base/defines.h>
#include <unordered_map>
@ -165,7 +166,7 @@ public:
private:
/// [connection_settings_string] -> [connection_pool]
using PoolFactory = std::unordered_map<std::string, nanodbc::PoolPtr>;
PoolFactory factory;
PoolFactory factory TSA_GUARDED_BY(mutex);
std::mutex mutex;
};

View File

@ -386,6 +386,39 @@
text-align: center;
margin-top: 5em;
}
#chart
{
background-color: var(--element-background-color);
filter: drop-shadow(.2rem .2rem .2rem var(--shadow-color));
display: none;
height: 70vh;
}
/* This is for charts (uPlot), Copyright (c) 2022 Leon Sorokin, MIT License, https://github.com/leeoniya/uPlot/ */
.u-wrap {position: relative;user-select: none;}
.u-over, .u-under, .u-axis {position: absolute;}
.u-under {overflow: hidden;}
.uplot canvas {display: block;position: relative;width: 100%;height: 100%;}
.u-legend {margin: auto;text-align: center; margin-top: 1em; font-family: Liberation Mono, DejaVu Sans Mono, MonoLisa, Consolas, monospace;}
.u-inline {display: block;}
.u-inline * {display: inline-block;}
.u-inline tr {margin-right: 16px;}
.u-legend th {font-weight: 600;}
.u-legend th > * {vertical-align: middle;display: inline-block;}
.u-legend td { min-width: 13em; }
.u-legend .u-marker {width: 1em;height: 1em;margin-right: 4px;background-clip: padding-box !important;}
.u-inline.u-live th::after {content: ":";vertical-align: middle;}
.u-inline:not(.u-live) .u-value {display: none;}
.u-series > * {padding: 4px;}
.u-series th {cursor: pointer;}
.u-legend .u-off > * {opacity: 0.3;}
.u-select {background: rgba(0,0,0,0.07);position: absolute;pointer-events: none;}
.u-cursor-x, .u-cursor-y {position: absolute;left: 0;top: 0;pointer-events: none;will-change: transform;z-index: 100;}
.u-hz .u-cursor-x, .u-vt .u-cursor-y {height: 100%;border-right: 1px dashed #607D8B;}
.u-hz .u-cursor-y, .u-vt .u-cursor-x {width: 100%;border-bottom: 1px dashed #607D8B;}
.u-cursor-pt {position: absolute;top: 0;left: 0;border-radius: 50%;border: 0 solid;pointer-events: none;will-change: transform;z-index: 100;/*this has to be !important since we set inline "background" shorthand */background-clip: padding-box !important;}
.u-axis.u-off, .u-select.u-off, .u-cursor-x.u-off, .u-cursor-y.u-off, .u-cursor-pt.u-off {display: none;}
</style>
</head>
@ -410,6 +443,7 @@
<table class="monospace-table shadow" id="data-table"></table>
<pre class="monospace-table shadow" id="data-unparsed"></pre>
</div>
<div id="chart"></div>
<svg id="graph" fill="none"></svg>
<p id="error" class="monospace shadow">
</p>
@ -530,8 +564,13 @@
if (status === 200) {
let json;
try { json = JSON.parse(response); } catch (e) {}
if (json !== undefined && json.statistics !== undefined) {
renderResult(json);
} else if (Array.isArray(json) && json.length == 2 &&
Array.isArray(json[0]) && Array.isArray(json[1]) && json[0].length > 1 && json[0].length == json[1].length) {
/// If user requested FORMAT JSONCompactColumns, we will render it as a chart.
renderChart(json);
} else {
renderUnparsedResult(response);
}
@ -578,30 +617,27 @@
}
}
function clearElement(id)
{
let elem = document.getElementById(id);
while (elem.firstChild) {
elem.removeChild(elem.lastChild);
}
elem.style.display = 'none';
}
function clear()
{
let table = document.getElementById('data-table');
while (table.firstChild) {
table.removeChild(table.lastChild);
}
let graph = document.getElementById('graph');
while (graph.firstChild) {
graph.removeChild(graph.lastChild);
}
graph.style.display = 'none';
document.getElementById('data-unparsed').innerText = '';
document.getElementById('data-unparsed').style.display = 'none';
document.getElementById('error').innerText = '';
document.getElementById('error').style.display = 'none';
clearElement('data-table');
clearElement('graph');
clearElement('chart');
clearElement('data-unparsed');
clearElement('error');
clearElement('hourglass');
document.getElementById('check-mark').innerText = '';
document.getElementById('hourglass').innerText = '';
document.getElementById('stats').innerText = '';
document.getElementById('hourglass').style.display = 'none';
document.getElementById('check-mark').style.display = 'none';
document.getElementById('logo-container').style.display = 'block';
}
@ -738,6 +774,7 @@
}
let table = document.getElementById('data-table');
table.appendChild(tbody);
table.style.display = 'table';
}
function renderTable(response)
@ -792,6 +829,7 @@
let table = document.getElementById('data-table');
table.appendChild(thead);
table.appendChild(tbody);
table.style.display = 'table';
}
/// A function to render raw data when non-default format is specified.
@ -873,16 +911,80 @@
svg.style.height = graph.graph().height;
}
function setColorTheme(theme) {
window.localStorage.setItem('theme', theme);
document.documentElement.setAttribute('data-theme', theme);
let load_uplot_promise;
function loadUplot() {
if (load_uplot_promise) { return load_uplot_promise; }
load_uplot_promise = loadJS('https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.iife.min.js',
'sha384-TwdJPnTsKP6pnvFZZKda0WJCXpjcHCa7MYHmjrYDu6rsEsb/UnFdoL0phS5ODqTA');
return load_uplot_promise;
}
let uplot;
async function renderChart(json)
{
await loadUplot();
clear();
let chart = document.getElementById('chart');
chart.style.display = 'block';
let paths = uPlot.paths.stepped({align: 1});
const [line_color, fill_color, grid_color, axes_color] = theme == 'light'
? ["#F80", "#FED", "#c7d0d9", "#2c3235"]
: ["#888", "#045", "#2c3235", "#c7d0d9"];
const opts = {
width: chart.clientWidth,
height: chart.clientHeight,
scales: { x: { time: json[0][0] > 1000000000 && json[0][0] < 2000000000 } },
axes: [ { stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color } },
{ stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color } } ],
series: [ { label: "x" },
{ label: "y", stroke: line_color, fill: fill_color,
drawStyle: 0, lineInterpolation: 1, paths } ],
padding: [ null, null, null, (Math.ceil(Math.log10(Math.max(...json[1]))) + Math.floor(Math.log10(Math.max(...json[1])) / 3)) * 6 ],
};
uplot = new uPlot(opts, json, chart);
}
function resizeChart() {
if (uplot) {
let chart = document.getElementById('chart');
uplot.setSize({ width: chart.clientWidth, height: chart.clientHeight });
}
}
function redrawChart() {
if (uplot && document.getElementById('chart').style.display == 'block') {
renderChart(uplot.data);
}
}
new ResizeObserver(resizeChart).observe(document.getElementById('chart'));
/// First we check if theme is set via the 'theme' GET parameter, if not, we check localStorage, otherwise we check OS preference.
let theme = current_url.searchParams.get('theme');
if (['dark', 'light'].indexOf(theme) === -1) {
theme = window.localStorage.getItem('theme');
}
if (!theme) {
theme = 'light';
}
function setColorTheme(new_theme, update_preference) {
theme = new_theme;
if (update_preference) {
window.localStorage.setItem('theme', theme);
}
document.documentElement.setAttribute('data-theme', theme);
redrawChart();
}
if (theme) {
document.documentElement.setAttribute('data-theme', theme);
@ -890,26 +992,21 @@
/// Obtain system-level user preference
const media_query_list = window.matchMedia('(prefers-color-scheme: dark)');
if (media_query_list.matches) {
/// Set without saving to localstorage
document.documentElement.setAttribute('data-theme', 'dark');
setColorTheme('dark');
}
/// There is a rumor that on some computers, the theme is changing automatically on day/night.
media_query_list.addEventListener('change', function(e) {
if (e.matches) {
document.documentElement.setAttribute('data-theme', 'dark');
} else {
document.documentElement.setAttribute('data-theme', 'light');
}
setColorTheme(e.matches ? 'dark' : 'light');
});
}
document.getElementById('toggle-light').onclick = function() {
setColorTheme('light');
setColorTheme('light', true);
}
document.getElementById('toggle-dark').onclick = function() {
setColorTheme('dark');
setColorTheme('dark', true);
}
</script>
</html>

358
src/Access/AccessBackup.cpp Normal file
View File

@ -0,0 +1,358 @@
#include <Access/AccessBackup.h>
#include <Access/AccessControl.h>
#include <Access/AccessEntityIO.h>
#include <Access/Common/AccessRightsElement.h>
#include <Access/User.h>
#include <Access/Role.h>
#include <Access/SettingsProfile.h>
#include <Access/RowPolicy.h>
#include <Access/Quota.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackup.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <Poco/UUIDGenerator.h>
#include <base/insertAtEnd.h>
#include <boost/range/algorithm/copy.hpp>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
/// Represents a list of access entities as they're stored in a backup.
struct AccessEntitiesInBackup
{
std::unordered_map<UUID, AccessEntityPtr> entities;
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
BackupEntryPtr toBackupEntry() const
{
WriteBufferFromOwnString buf;
for (const auto & [id, entity] : entities)
{
writeText(id, buf);
writeChar('\t', buf);
writeText(entity->getTypeInfo().name, buf);
writeChar('\t', buf);
writeText(entity->getName(), buf);
writeChar('\n', buf);
writeText(serializeAccessEntity(*entity), buf);
writeChar('\n', buf);
}
if (!dependencies.empty())
{
writeText("DEPENDENCIES\n", buf);
for (const auto & [id, name_and_type] : dependencies)
{
writeText(id, buf);
writeChar('\t', buf);
writeText(AccessEntityTypeInfo::get(name_and_type.second).name, buf);
writeChar('\t', buf);
writeText(name_and_type.first, buf);
writeChar('\n', buf);
}
}
return std::make_shared<BackupEntryFromMemory>(buf.str());
}
static AccessEntitiesInBackup fromBackupEntry(const IBackupEntry & backup_entry, const String & file_path)
{
try
{
AccessEntitiesInBackup res;
std::unique_ptr<ReadBuffer> buf = backup_entry.getReadBuffer();
bool dependencies_found = false;
while (!buf->eof())
{
String line;
readStringUntilNewlineInto(line, *buf);
buf->ignore();
if (line == "DEPENDENCIES")
{
dependencies_found = true;
break;
}
size_t id_endpos = line.find('\t');
String id_as_string = line.substr(0, id_endpos);
UUID id = parse<UUID>(line);
line.clear();
String queries;
while (!buf->eof())
{
String query;
readStringUntilNewlineInto(query, *buf);
buf->ignore();
if (query.empty())
break;
if (!queries.empty())
queries.append("\n");
queries.append(query);
}
AccessEntityPtr entity = deserializeAccessEntity(queries);
res.entities.emplace(id, entity);
}
if (dependencies_found)
{
while (!buf->eof())
{
String id_as_string;
readStringInto(id_as_string, *buf);
buf->ignore();
UUID id = parse<UUID>(id_as_string);
String type_as_string;
readStringInto(type_as_string, *buf);
buf->ignore();
AccessEntityType type = AccessEntityTypeInfo::parseType(type_as_string);
String name;
readStringInto(name, *buf);
buf->ignore();
if (!res.entities.contains(id))
res.dependencies.emplace(id, std::pair{name, type});
}
}
return res;
}
catch (Exception & e)
{
e.addMessage("While parsing " + file_path);
throw;
}
}
};
std::vector<UUID> findDependencies(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities)
{
std::vector<UUID> res;
for (const auto & entity : entities | boost::adaptors::map_values)
insertAtEnd(res, entity->findDependencies());
/// Remove duplicates in the list of dependencies (some entities can refer to other entities).
::sort(res.begin(), res.end());
res.erase(std::unique(res.begin(), res.end()), res.end());
for (const auto & id : entities | boost::adaptors::map_keys)
{
auto it = std::lower_bound(res.begin(), res.end(), id);
if ((it != res.end()) && (*it == id))
res.erase(it);
}
return res;
}
std::unordered_map<UUID, std::pair<String, AccessEntityType>> readDependenciesNamesAndTypes(const std::vector<UUID> & dependencies, const AccessControl & access_control)
{
std::unordered_map<UUID, std::pair<String, AccessEntityType>> res;
for (const auto & id : dependencies)
{
if (auto name_and_type = access_control.tryReadNameWithType(id))
res.emplace(id, name_and_type.value());
}
return res;
}
std::unordered_map<UUID, UUID> resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies, const AccessControl & access_control, bool allow_unresolved_dependencies)
{
std::unordered_map<UUID, UUID> old_to_new_ids;
for (const auto & [id, name_and_type] : dependencies)
{
std::optional<UUID> new_id;
if (allow_unresolved_dependencies)
new_id = access_control.find(name_and_type.second, name_and_type.first);
else
new_id = access_control.getID(name_and_type.second, name_and_type.first);
if (new_id)
old_to_new_ids.emplace(id, *new_id);
}
return old_to_new_ids;
}
void generateRandomIDs(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, std::unordered_map<UUID, UUID> & old_to_new_ids)
{
Poco::UUIDGenerator generator;
for (auto & [id, entity] : entities)
{
UUID new_id;
generator.createRandom().copyTo(reinterpret_cast<char *>(&new_id));
old_to_new_ids.emplace(id, new_id);
id = new_id;
}
}
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
for (auto & entity : entities | boost::adaptors::map_values)
{
bool need_replace = false;
for (const auto & dependency : entity->findDependencies())
{
if (old_to_new_ids.contains(dependency))
{
need_replace = true;
break;
}
}
if (!need_replace)
continue;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
}
AccessRightsElements getRequiredAccessToRestore(const std::unordered_map<UUID, AccessEntityPtr> & entities)
{
AccessRightsElements res;
for (const auto & entity : entities | boost::adaptors::map_values)
{
auto entity_type = entity->getType();
switch (entity_type)
{
case User::TYPE:
{
const auto & user = typeid_cast<const User &>(*entity);
res.emplace_back(AccessType::CREATE_USER);
auto elements = user.access.getElements();
for (auto & element : elements)
{
if (element.is_partial_revoke)
continue;
element.grant_option = true;
res.emplace_back(element);
}
if (!user.granted_roles.isEmpty())
res.emplace_back(AccessType::ROLE_ADMIN);
break;
}
case Role::TYPE:
{
const auto & role = typeid_cast<const Role &>(*entity);
res.emplace_back(AccessType::CREATE_ROLE);
auto elements = role.access.getElements();
for (auto & element : elements)
{
if (element.is_partial_revoke)
continue;
element.grant_option = true;
res.emplace_back(element);
}
if (!role.granted_roles.isEmpty())
res.emplace_back(AccessType::ROLE_ADMIN);
break;
}
case SettingsProfile::TYPE:
{
res.emplace_back(AccessType::CREATE_SETTINGS_PROFILE);
break;
}
case RowPolicy::TYPE:
{
const auto & policy = typeid_cast<const RowPolicy &>(*entity);
res.emplace_back(AccessType::CREATE_ROW_POLICY, policy.getDatabase(), policy.getTableName());
break;
}
case Quota::TYPE:
{
res.emplace_back(AccessType::CREATE_QUOTA);
break;
}
default:
throw Exception("Unknown type: " + toString(entity_type), ErrorCodes::LOGICAL_ERROR);
}
}
return res;
}
}
void backupAccessEntities(
BackupEntriesCollector & backup_entries_collector,
const String & data_path_in_backup,
const AccessControl & access_control,
AccessEntityType type)
{
auto entities = access_control.readAllForBackup(type, backup_entries_collector.getBackupSettings());
auto dependencies = readDependenciesNamesAndTypes(findDependencies(entities), access_control);
AccessEntitiesInBackup ab;
boost::range::copy(entities, std::inserter(ab.entities, ab.entities.end()));
ab.dependencies = std::move(dependencies);
backup_entries_collector.addBackupEntry(fs::path{data_path_in_backup} / "access.txt", ab.toBackupEntry());
}
AccessRestoreTask::AccessRestoreTask(
const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr<IRestoreCoordination> restore_coordination_)
: backup(backup_), restore_settings(restore_settings_), restore_coordination(restore_coordination_)
{
}
AccessRestoreTask::~AccessRestoreTask() = default;
void AccessRestoreTask::addDataPath(const String & data_path)
{
if (!data_paths.emplace(data_path).second)
return;
String file_path = fs::path{data_path} / "access.txt";
auto backup_entry = backup->readFile(file_path);
auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, file_path);
boost::range::copy(ab.entities, std::inserter(entities, entities.end()));
boost::range::copy(ab.dependencies, std::inserter(dependencies, dependencies.end()));
for (const auto & id : entities | boost::adaptors::map_keys)
dependencies.erase(id);
}
bool AccessRestoreTask::hasDataPath(const String & data_path) const
{
return data_paths.contains(data_path);
}
AccessRightsElements AccessRestoreTask::getRequiredAccess() const
{
return getRequiredAccessToRestore(entities);
}
void AccessRestoreTask::restore(AccessControl & access_control) const
{
auto old_to_new_ids = resolveDependencies(dependencies, access_control, restore_settings.allow_unresolved_access_dependencies);
std::vector<std::pair<UUID, AccessEntityPtr>> new_entities;
boost::range::copy(entities, std::back_inserter(new_entities));
generateRandomIDs(new_entities, old_to_new_ids);
replaceDependencies(new_entities, old_to_new_ids);
access_control.insertFromBackup(new_entities, restore_settings, restore_coordination);
}
}

56
src/Access/AccessBackup.h Normal file
View File

@ -0,0 +1,56 @@
#pragma once
#include <Backups/RestoreSettings.h>
#include <unordered_map>
#include <unordered_set>
namespace DB
{
class AccessControl;
enum class AccessEntityType;
class BackupEntriesCollector;
class RestorerFromBackup;
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
class IRestoreCoordination;
struct IAccessEntity;
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;
class AccessRightsElements;
/// Makes a backup of access entities of a specified type.
void backupAccessEntities(
BackupEntriesCollector & backup_entries_collector,
const String & data_path_in_backup,
const AccessControl & access_control,
AccessEntityType type);
/// Restores access entities from a backup.
class AccessRestoreTask
{
public:
AccessRestoreTask(
const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr<IRestoreCoordination> restore_coordination_);
~AccessRestoreTask();
/// Adds a data path to loads access entities from.
void addDataPath(const String & data_path);
bool hasDataPath(const String & data_path) const;
/// Checks that the current user can do restoring.
AccessRightsElements getRequiredAccess() const;
/// Inserts all access entities loaded from all the paths added by addDataPath().
void restore(AccessControl & access_control) const;
private:
BackupPtr backup;
RestoreSettings restore_settings;
std::shared_ptr<IRestoreCoordination> restore_coordination;
std::unordered_map<UUID, AccessEntityPtr> entities;
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
std::unordered_set<String> data_paths;
};
}

View File

@ -15,7 +15,11 @@
#include <Access/User.h>
#include <Access/ExternalAuthenticators.h>
#include <Access/AccessChangesNotifier.h>
#include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Core/Settings.h>
#include <base/defines.h>
#include <base/find_symbols.h>
#include <Poco/AccessExpireCache.h>
#include <boost/algorithm/string/join.hpp>
@ -130,7 +134,7 @@ public:
}
private:
Strings registered_prefixes;
Strings registered_prefixes TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};
@ -184,39 +188,25 @@ void AccessControl::setUsersConfig(const Poco::Util::AbstractConfiguration & use
return;
}
}
addUsersConfigStorage(users_config_);
addUsersConfigStorage(UsersConfigAccessStorage::STORAGE_TYPE, users_config_, false);
}
void AccessControl::addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_)
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_, bool allow_backup_)
{
addUsersConfigStorage(UsersConfigAccessStorage::STORAGE_TYPE, users_config_);
}
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_)
{
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this, allow_backup_);
new_storage->setConfig(users_config_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}",
String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
void AccessControl::addUsersConfigStorage(
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
addUsersConfigStorage(
UsersConfigAccessStorage::STORAGE_TYPE, users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
}
void AccessControl::addUsersConfigStorage(
const String & storage_name_,
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
const zkutil::GetZooKeeper & get_zookeeper_function_,
bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -227,7 +217,7 @@ void AccessControl::addUsersConfigStorage(
return;
}
}
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this, allow_backup_);
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
@ -237,7 +227,8 @@ void AccessControl::addUsersConfigStorage(
void AccessControl::addReplicatedStorage(
const String & storage_name_,
const String & zookeeper_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
const zkutil::GetZooKeeper & get_zookeeper_function_,
bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -245,17 +236,12 @@ void AccessControl::addReplicatedStorage(
if (auto replicated_storage = typeid_cast<std::shared_ptr<ReplicatedAccessStorage>>(storage))
return;
}
auto new_storage = std::make_shared<ReplicatedAccessStorage>(storage_name_, zookeeper_path_, get_zookeeper_function_, *changes_notifier);
auto new_storage = std::make_shared<ReplicatedAccessStorage>(storage_name_, zookeeper_path_, get_zookeeper_function_, *changes_notifier, allow_backup_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName());
}
void AccessControl::addDiskStorage(const String & directory_, bool readonly_)
{
addDiskStorage(DiskAccessStorage::STORAGE_TYPE, directory_, readonly_);
}
void AccessControl::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_)
void AccessControl::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_, bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -270,13 +256,13 @@ void AccessControl::addDiskStorage(const String & storage_name_, const String &
}
}
}
auto new_storage = std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_, *changes_notifier);
auto new_storage = std::make_shared<DiskAccessStorage>(storage_name_, directory_, *changes_notifier, readonly_, allow_backup_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
void AccessControl::addMemoryStorage(const String & storage_name_)
void AccessControl::addMemoryStorage(const String & storage_name_, bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -284,7 +270,7 @@ void AccessControl::addMemoryStorage(const String & storage_name_)
if (auto memory_storage = typeid_cast<std::shared_ptr<MemoryAccessStorage>>(storage))
return;
}
auto new_storage = std::make_shared<MemoryAccessStorage>(storage_name_, *changes_notifier);
auto new_storage = std::make_shared<MemoryAccessStorage>(storage_name_, *changes_notifier, allow_backup_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName());
}
@ -327,20 +313,23 @@ void AccessControl::addStoragesFromUserDirectoriesConfig(
if (type == MemoryAccessStorage::STORAGE_TYPE)
{
addMemoryStorage(name);
bool allow_backup = config.getBool(prefix + ".allow_backup", true);
addMemoryStorage(name, allow_backup);
}
else if (type == UsersConfigAccessStorage::STORAGE_TYPE)
{
String path = config.getString(prefix + ".path");
if (std::filesystem::path{path}.is_relative() && std::filesystem::exists(config_dir + path))
path = config_dir + path;
addUsersConfigStorage(name, path, include_from_path, dbms_dir, get_zookeeper_function);
bool allow_backup = config.getBool(prefix + ".allow_backup", false); /// We don't backup users.xml by default.
addUsersConfigStorage(name, path, include_from_path, dbms_dir, get_zookeeper_function, allow_backup);
}
else if (type == DiskAccessStorage::STORAGE_TYPE)
{
String path = config.getString(prefix + ".path");
bool readonly = config.getBool(prefix + ".readonly", false);
addDiskStorage(name, path, readonly);
bool allow_backup = config.getBool(prefix + ".allow_backup", true);
addDiskStorage(name, path, readonly, allow_backup);
}
else if (type == LDAPAccessStorage::STORAGE_TYPE)
{
@ -349,7 +338,8 @@ void AccessControl::addStoragesFromUserDirectoriesConfig(
else if (type == ReplicatedAccessStorage::STORAGE_TYPE)
{
String zookeeper_path = config.getString(prefix + ".zookeeper_path");
addReplicatedStorage(name, zookeeper_path, get_zookeeper_function);
bool allow_backup = config.getBool(prefix + ".allow_backup", true);
addReplicatedStorage(name, zookeeper_path, get_zookeeper_function, allow_backup);
}
else
throw Exception("Unknown storage type '" + type + "' at " + prefix + " in config", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
@ -383,12 +373,18 @@ void AccessControl::addStoragesFromMainConfig(
if (users_config_path != config_path)
checkForUsersNotInMainConfig(config, config_path, users_config_path, getLogger());
addUsersConfigStorage(users_config_path, include_from_path, dbms_dir, get_zookeeper_function);
addUsersConfigStorage(
UsersConfigAccessStorage::STORAGE_TYPE,
users_config_path,
include_from_path,
dbms_dir,
get_zookeeper_function,
/* allow_backup= */ false);
}
String disk_storage_dir = config.getString("access_control_path", "");
if (!disk_storage_dir.empty())
addDiskStorage(disk_storage_dir);
addDiskStorage(DiskAccessStorage::STORAGE_TYPE, disk_storage_dir, /* readonly= */ false, /* allow_backup= */ true);
if (has_user_directories)
addStoragesFromUserDirectoriesConfig(config, "user_directories", config_dir, dbms_dir, include_from_path, get_zookeeper_function);
@ -463,6 +459,23 @@ UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Ne
}
}
void AccessControl::backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const
{
backupAccessEntities(backup_entries_collector, data_path_in_backup, *this, type);
}
void AccessControl::restore(RestorerFromBackup & restorer, const String & data_path_in_backup)
{
/// The restorer must already know about `data_path_in_backup`, but let's check.
restorer.checkPathInBackupToRestoreAccess(data_path_in_backup);
}
void AccessControl::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination)
{
MultipleAccessStorage::insertFromBackup(entities_from_backup, restore_settings, restore_coordination);
changes_notifier->sendNotifications();
}
void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
{
external_authenticators->setConfiguration(config, getLogger());

View File

@ -42,6 +42,8 @@ class ClientInfo;
class ExternalAuthenticators;
class AccessChangesNotifier;
struct Settings;
class BackupEntriesCollector;
class RestorerFromBackup;
/// Manages access control entities.
@ -60,37 +62,31 @@ public:
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_);
/// Adds UsersConfigAccessStorage.
void addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_);
void addUsersConfigStorage(const String & storage_name_,
const Poco::Util::AbstractConfiguration & users_config_);
void addUsersConfigStorage(const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_ = {});
const Poco::Util::AbstractConfiguration & users_config_,
bool allow_backup_);
void addUsersConfigStorage(const String & storage_name_,
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_ = {});
const zkutil::GetZooKeeper & get_zookeeper_function_,
bool allow_backup_);
/// Loads access entities from the directory on the local disk.
/// Use that directory to keep created users/roles/etc.
void addDiskStorage(const String & directory_, bool readonly_ = false);
void addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_ = false);
void addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_, bool allow_backup_);
/// Adds MemoryAccessStorage which keeps access entities in memory.
void addMemoryStorage();
void addMemoryStorage(const String & storage_name_);
void addMemoryStorage(const String & storage_name_, bool allow_backup_);
/// Adds LDAPAccessStorage which allows querying remote LDAP server for user info.
void addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_);
void addReplicatedStorage(const String & storage_name,
const String & zookeeper_path,
const zkutil::GetZooKeeper & get_zookeeper_function);
const zkutil::GetZooKeeper & get_zookeeper_function,
bool allow_backup);
/// Adds storages from <users_directories> config.
void addStoragesFromUserDirectoriesConfig(const Poco::Util::AbstractConfiguration & config,
@ -123,6 +119,11 @@ public:
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
/// Makes a backup of access entities.
void backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const;
static void restore(RestorerFromBackup & restorer, const String & data_path_in_backup);
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
/// Sets the default profile's name.
@ -197,6 +198,8 @@ public:
/// Gets manager of notifications.
AccessChangesNotifier & getChangesNotifier();
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
class ContextAccessCache;
class CustomSettingsPrefixes;

View File

@ -736,6 +736,18 @@ AccessRights::AccessRights(const AccessFlags & access)
}
AccessRights::AccessRights(const AccessRightsElement & element)
{
grant(element);
}
AccessRights::AccessRights(const AccessRightsElements & elements)
{
grant(elements);
}
bool AccessRights::isEmpty() const
{
return !root && !root_with_grant_option;

View File

@ -16,6 +16,9 @@ class AccessRights
public:
AccessRights();
explicit AccessRights(const AccessFlags & access);
explicit AccessRights(const AccessRightsElement & element);
explicit AccessRights(const AccessRightsElements & elements);
~AccessRights();
AccessRights(const AccessRights & src);
AccessRights & operator =(const AccessRights & src);

View File

@ -1,7 +1,9 @@
#include <Access/Common/AccessEntityType.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <base/range.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/replace.hpp>
@ -15,6 +17,7 @@ namespace ErrorCodes
extern const int UNKNOWN_QUOTA;
extern const int THERE_IS_NO_PROFILE;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
@ -83,4 +86,15 @@ const AccessEntityTypeInfo & AccessEntityTypeInfo::get(AccessEntityType type_)
throw Exception("Unknown type: " + std::to_string(static_cast<size_t>(type_)), ErrorCodes::LOGICAL_ERROR);
}
AccessEntityType AccessEntityTypeInfo::parseType(const String & name_)
{
for (auto type : collections::range(AccessEntityType::MAX))
{
const auto & info = get(type);
if (boost::iequals(info.name, name_))
return type;
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type: {}", name_);
}
}

View File

@ -35,6 +35,7 @@ struct AccessEntityTypeInfo
String formatEntityNameWithType(const String & entity_name) const;
static const AccessEntityTypeInfo & get(AccessEntityType type_);
static AccessEntityType parseType(const String & name_);
};
}

View File

@ -99,6 +99,7 @@ enum class AccessType
\
M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \
M(OPTIMIZE, "OPTIMIZE TABLE", TABLE, ALL) \
M(BACKUP, "", TABLE, ALL) /* allows to backup tables */\
\
M(KILL_QUERY, "", GLOBAL, ALL) /* allows to kill a query started by another user
(anyone can kill his own queries) */\

View File

@ -1,6 +1,7 @@
#include <Access/DiskAccessStorage.h>
#include <Access/AccessEntityIO.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestoreSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
@ -165,11 +166,12 @@ namespace
}
DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_, AccessChangesNotifier & changes_notifier_)
DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_)
: IAccessStorage(storage_name_), changes_notifier(changes_notifier_)
{
directory_path = makeDirectoryPathCanonical(directory_path_);
readonly = readonly_;
backup_allowed = allow_backup_;
std::error_code create_dir_error_code;
std::filesystem::create_directories(directory_path, create_dir_error_code);
@ -457,7 +459,7 @@ AccessEntityPtr DiskAccessStorage::readImpl(const UUID & id, bool throw_if_not_e
}
std::optional<String> DiskAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
std::lock_guard lock{mutex};
auto it = entries_by_id.find(id);
@ -468,21 +470,27 @@ std::optional<String> DiskAccessStorage::readNameImpl(const UUID & id, bool thro
else
return std::nullopt;
}
return it->second.name;
return std::make_pair(it->second.name, it->second.type);
}
std::optional<UUID> DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
UUID id = generateRandomID();
std::lock_guard lock{mutex};
if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists))
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
return id;
return std::nullopt;
}
bool DiskAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
}
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const String & name = new_entity->getName();
@ -649,4 +657,20 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const
throw Exception("Couldn't delete " + file_path, ErrorCodes::FILE_DOESNT_EXIST);
}
void DiskAccessStorage::insertFromBackup(
const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup,
const RestoreSettings & restore_settings,
std::shared_ptr<IRestoreCoordination>)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate);
for (const auto & [id, entity] : entities_from_backup)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
}
}

View File

@ -15,7 +15,7 @@ class DiskAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "local directory";
DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_, AccessChangesNotifier & changes_notifier_);
DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_);
~DiskAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -29,11 +29,14 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
@ -47,6 +50,7 @@ private:
void listsWritingThreadFunc();
void stopListsWritingThread();
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -65,7 +69,6 @@ private:
};
String directory_path;
std::atomic<bool> readonly;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
boost::container::flat_set<AccessEntityType> types_of_lists_to_write;
@ -74,6 +77,8 @@ private:
std::condition_variable lists_writing_thread_should_exit; /// Signals `lists_writing_thread` to exit.
bool lists_writing_thread_is_waiting = false;
AccessChangesNotifier & changes_notifier;
std::atomic<bool> readonly;
std::atomic<bool> backup_allowed;
mutable std::mutex mutex;
};
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Access/SettingsProfileElement.h>
#include <base/defines.h>
#include <Core/UUID.h>
#include <boost/container/flat_set.hpp>
#include <mutex>
@ -42,7 +43,7 @@ private:
void setInfo(const std::shared_ptr<const SettingsProfilesInfo> & info_);
const Params params;
std::shared_ptr<const SettingsProfilesInfo> info;
std::shared_ptr<const SettingsProfilesInfo> info TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};
}

View File

@ -231,18 +231,23 @@ void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco
params.prefix = config.getString(prefix + ".prefix");
}
void ExternalAuthenticators::reset()
void ExternalAuthenticators::resetImpl()
{
std::scoped_lock lock(mutex);
ldap_client_params_blueprint.clear();
ldap_caches.clear();
kerberos_params.reset();
}
void ExternalAuthenticators::reset()
{
std::scoped_lock lock(mutex);
resetImpl();
}
void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log)
{
std::scoped_lock lock(mutex);
reset();
resetImpl();
Poco::Util::AbstractConfiguration::Keys all_keys;
config.keys("", all_keys);

View File

@ -3,6 +3,7 @@
#include <Access/LDAPClient.h>
#include <Access/Credentials.h>
#include <Access/GSSAcceptor.h>
#include <base/defines.h>
#include <base/types.h>
#include <chrono>
@ -22,7 +23,6 @@ namespace Poco
}
}
namespace DB
{
@ -51,10 +51,12 @@ private:
using LDAPCaches = std::map<String, LDAPCache>; // server name -> cache
using LDAPParams = std::map<String, LDAPClient::Params>; // server name -> params
mutable std::recursive_mutex mutex;
LDAPParams ldap_client_params_blueprint;
mutable LDAPCaches ldap_caches;
std::optional<GSSAcceptorContext::Params> kerberos_params;
mutable std::mutex mutex;
LDAPParams ldap_client_params_blueprint TSA_GUARDED_BY(mutex) ;
mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ;
std::optional<GSSAcceptorContext::Params> kerberos_params TSA_GUARDED_BY(mutex) ;
void resetImpl() TSA_REQUIRES(mutex);
};
void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix);

View File

@ -2,6 +2,8 @@
#include <Access/RolesOrUsersSet.h>
#include <boost/range/algorithm/set_algorithm.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
#include <boost/range/algorithm/copy.hpp>
namespace DB
{
@ -166,4 +168,57 @@ void GrantedRoles::makeIntersection(const GrantedRoles & other)
return other.roles_with_admin_option.find(id) == other.roles_with_admin_option.end();
});
}
std::vector<UUID> GrantedRoles::findDependencies() const
{
std::vector<UUID> res;
boost::range::copy(roles, std::back_inserter(res));
return res;
}
void GrantedRoles::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
std::vector<UUID> new_ids;
for (auto it = roles.begin(); it != roles.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = roles.erase(it);
}
else
{
++it;
}
}
if (!new_ids.empty())
{
boost::range::copy(new_ids, std::inserter(roles, roles.end()));
new_ids.clear();
for (auto it = roles_with_admin_option.begin(); it != roles_with_admin_option.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = roles_with_admin_option.erase(it);
}
else
{
++it;
}
}
boost::range::copy(new_ids, std::inserter(roles_with_admin_option, roles_with_admin_option.end()));
}
}
}

View File

@ -3,6 +3,7 @@
#include <Core/UUID.h>
#include <boost/container/flat_set.hpp>
#include <vector>
#include <unordered_map>
namespace DB
@ -24,6 +25,8 @@ public:
void revokeAdminOption(const UUID & role_);
void revokeAdminOption(const std::vector<UUID> & roles_);
bool isEmpty() const { return roles.empty(); }
bool isGranted(const UUID & role_) const;
bool isGrantedWithAdminOption(const UUID & role_) const;
@ -54,6 +57,9 @@ public:
friend bool operator ==(const GrantedRoles & left, const GrantedRoles & right) { return (left.roles == right.roles) && (left.roles_with_admin_option == right.roles_with_admin_option); }
friend bool operator !=(const GrantedRoles & left, const GrantedRoles & right) { return !(left == right); }
std::vector<UUID> findDependencies() const;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids);
private:
boost::container::flat_set<UUID> roles;
boost::container::flat_set<UUID> roles_with_admin_option;

View File

@ -4,6 +4,7 @@
#include <Common/typeid_cast.h>
#include <base/types.h>
#include <memory>
#include <unordered_map>
namespace DB
@ -45,6 +46,15 @@ struct IAccessEntity
bool operator()(const std::shared_ptr<const IAccessEntity> & lhs, const std::shared_ptr<const IAccessEntity> & rhs) const { return operator()(*lhs, *rhs); }
};
/// Finds all dependencies.
virtual std::vector<UUID> findDependencies() const { return {}; }
/// Replaces dependencies according to a specified map.
virtual void replaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
/// Whether this access entity should be written to a backup.
virtual bool isBackupAllowed() const { return false; }
protected:
String name;

View File

@ -10,6 +10,7 @@
#include <base/FnTraits.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
namespace DB
@ -19,6 +20,7 @@ namespace ErrorCodes
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
extern const int ACCESS_ENTITY_NOT_FOUND;
extern const int ACCESS_STORAGE_READONLY;
extern const int ACCESS_STORAGE_DOESNT_ALLOW_BACKUP;
extern const int WRONG_PASSWORD;
extern const int IP_ADDRESS_NOT_ALLOWED;
extern const int LOGICAL_ERROR;
@ -83,13 +85,15 @@ std::vector<UUID> IAccessStorage::getIDs(AccessEntityType type, const Strings &
String IAccessStorage::readName(const UUID & id) const
{
return *readNameImpl(id, /* throw_if_not_exists = */ true);
return readNameWithType(id).first;
}
std::optional<String> IAccessStorage::readName(const UUID & id, bool throw_if_not_exists) const
{
return readNameImpl(id, throw_if_not_exists);
if (auto name_and_type = readNameWithType(id, throw_if_not_exists))
return name_and_type->first;
return std::nullopt;
}
@ -99,7 +103,7 @@ Strings IAccessStorage::readNames(const std::vector<UUID> & ids, bool throw_if_n
res.reserve(ids.size());
for (const auto & id : ids)
{
if (auto name = readNameImpl(id, throw_if_not_exists))
if (auto name = readName(id, throw_if_not_exists))
res.emplace_back(std::move(name).value());
}
return res;
@ -118,14 +122,42 @@ Strings IAccessStorage::tryReadNames(const std::vector<UUID> & ids) const
}
std::optional<String> IAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::pair<String, AccessEntityType> IAccessStorage::readNameWithType(const UUID & id) const
{
return *readNameWithTypeImpl(id, /* throw_if_not_exists = */ true);
}
std::optional<std::pair<String, AccessEntityType>> IAccessStorage::readNameWithType(const UUID & id, bool throw_if_not_exists) const
{
return readNameWithTypeImpl(id, throw_if_not_exists);
}
std::optional<std::pair<String, AccessEntityType>> IAccessStorage::tryReadNameWithType(const UUID & id) const
{
return readNameWithTypeImpl(id, /* throw_if_not_exists = */ false);
}
std::optional<std::pair<String, AccessEntityType>> IAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
if (auto entity = read(id, throw_if_not_exists))
return entity->getName();
return std::make_pair(entity->getName(), entity->getType());
return std::nullopt;
}
std::vector<std::pair<UUID, AccessEntityPtr>> IAccessStorage::readAllWithIDs(AccessEntityType type) const
{
std::vector<std::pair<UUID, AccessEntityPtr>> entities;
for (const auto & id : findAll(type))
{
if (auto entity = tryRead(id))
entities.emplace_back(id, entity);
}
return entities;
}
UUID IAccessStorage::insert(const AccessEntityPtr & entity)
{
return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
@ -488,6 +520,29 @@ bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddr
}
bool IAccessStorage::isRestoreAllowed() const
{
return isBackupAllowed() && !isReadOnly();
}
std::vector<std::pair<UUID, AccessEntityPtr>> IAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings &) const
{
if (!isBackupAllowed())
throwBackupNotAllowed();
auto res = readAllWithIDs(type);
boost::range::remove_erase_if(res, [](const std::pair<UUID, AccessEntityPtr> & x) { return !x.second->isBackupAllowed(); });
return res;
}
void IAccessStorage::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> &, const RestoreSettings &, std::shared_ptr<IRestoreCoordination>)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "insertFromBackup() is not implemented in {}", getStorageType());
}
UUID IAccessStorage::generateRandomID()
{
static Poco::UUIDGenerator generator;
@ -577,6 +632,7 @@ void IAccessStorage::throwReadonlyCannotRemove(AccessEntityType type, const Stri
ErrorCodes::ACCESS_STORAGE_READONLY);
}
void IAccessStorage::throwAddressNotAllowed(const Poco::Net::IPAddress & address)
{
throw Exception("Connections from " + address.toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED);
@ -589,9 +645,20 @@ void IAccessStorage::throwAuthenticationTypeNotAllowed(AuthenticationType auth_t
"Authentication type {} is not allowed, check the setting allow_{} in the server configuration",
toString(auth_type), AuthenticationTypeInfo::get(auth_type).name);
}
void IAccessStorage::throwInvalidCredentials()
{
throw Exception("Invalid credentials", ErrorCodes::WRONG_PASSWORD);
}
void IAccessStorage::throwBackupNotAllowed() const
{
throw Exception(ErrorCodes::ACCESS_STORAGE_DOESNT_ALLOW_BACKUP, "Backup of access entities is not allowed in {}", getStorageName());
}
void IAccessStorage::throwRestoreNotAllowed() const
{
throw Exception(ErrorCodes::ACCESS_STORAGE_DOESNT_ALLOW_BACKUP, "Restore of access entities is not allowed in {}", getStorageName());
}
}

View File

@ -18,6 +18,9 @@ struct User;
class Credentials;
class ExternalAuthenticators;
enum class AuthenticationType;
struct BackupSettings;
struct RestoreSettings;
class IRestoreCoordination;
/// Contains entities, i.e. instances of classes derived from IAccessEntity.
/// The implementations of this class MUST be thread-safe.
@ -101,6 +104,16 @@ public:
std::optional<String> tryReadName(const UUID & id) const;
Strings tryReadNames(const std::vector<UUID> & ids) const;
std::pair<String, AccessEntityType> readNameWithType(const UUID & id) const;
std::optional<std::pair<String, AccessEntityType>> readNameWithType(const UUID & id, bool throw_if_not_exists) const;
std::optional<std::pair<String, AccessEntityType>> tryReadNameWithType(const UUID & id) const;
/// Reads all entities and returns them with their IDs.
template <typename EntityClassT>
std::vector<std::pair<UUID, std::shared_ptr<const EntityClassT>>> readAllWithIDs() const;
std::vector<std::pair<UUID, AccessEntityPtr>> readAllWithIDs(AccessEntityType type) const;
/// Inserts an entity to the storage. Returns ID of a new entry in the storage.
/// Throws an exception if the specified name already exists.
UUID insert(const AccessEntityPtr & entity);
@ -143,11 +156,19 @@ public:
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool allow_no_password, bool allow_plaintext_password) const;
std::optional<UUID> authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const;
/// Returns true if this storage can be stored to or restored from a backup.
virtual bool isBackupAllowed() const { return false; }
virtual bool isRestoreAllowed() const;
/// Makes a backup of this access storage.
virtual std::vector<std::pair<UUID, AccessEntityPtr>> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const;
virtual void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination);
protected:
virtual std::optional<UUID> findImpl(AccessEntityType type, const String & name) const = 0;
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const = 0;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0;
virtual std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const;
virtual std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -170,6 +191,8 @@ protected:
[[noreturn]] static void throwAddressNotAllowed(const Poco::Net::IPAddress & address);
[[noreturn]] static void throwInvalidCredentials();
[[noreturn]] static void throwAuthenticationTypeNotAllowed(AuthenticationType auth_type);
[[noreturn]] void throwBackupNotAllowed() const;
[[noreturn]] void throwRestoreNotAllowed() const;
private:
const String storage_name;
@ -218,4 +241,17 @@ std::shared_ptr<const EntityClassT> IAccessStorage::tryRead(const String & name)
{
return read<EntityClassT>(name, false);
}
template <typename EntityClassT>
std::vector<std::pair<UUID, std::shared_ptr<const EntityClassT>>> IAccessStorage::readAllWithIDs() const
{
std::vector<std::pair<UUID, std::shared_ptr<const EntityClassT>>> entities;
for (const auto & id : findAll<EntityClassT>())
{
if (auto entity = tryRead<EntityClassT>(id))
entities.emplace_back(id, entity);
}
return entities;
}
}

View File

@ -28,7 +28,7 @@ namespace ErrorCodes
LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix)
: IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier())
: IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier(), false)
{
setConfiguration(config, prefix);
}
@ -36,6 +36,7 @@ LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl
String LDAPAccessStorage::getLDAPServerName() const
{
std::scoped_lock lock(mutex);
return ldap_server_name;
}
@ -442,10 +443,10 @@ AccessEntityPtr LDAPAccessStorage::readImpl(const UUID & id, bool throw_if_not_e
}
std::optional<String> LDAPAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> LDAPAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
std::scoped_lock lock(mutex);
return memory_storage.readName(id, throw_if_not_exists);
return memory_storage.readNameWithType(id, throw_if_not_exists);
}
@ -504,4 +505,5 @@ std::optional<UUID> LDAPAccessStorage::authenticateImpl(
return id;
}
}

View File

@ -47,7 +47,7 @@ private: // IAccessStorage implementations.
virtual std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const override;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
void setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix);

View File

@ -1,5 +1,6 @@
#include <Access/MemoryAccessStorage.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestoreSettings.h>
#include <base/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp>
@ -8,8 +9,8 @@
namespace DB
{
MemoryAccessStorage::MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_)
: IAccessStorage(storage_name_), changes_notifier(changes_notifier_)
MemoryAccessStorage::MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_, bool allow_backup_)
: IAccessStorage(storage_name_), changes_notifier(changes_notifier_), backup_allowed(allow_backup_)
{
}
@ -65,14 +66,20 @@ AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not
std::optional<UUID> MemoryAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
UUID id = generateRandomID();
std::lock_guard lock{mutex};
if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists))
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
return id;
return std::nullopt;
}
bool MemoryAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
}
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const String & name = new_entity->getName();
@ -264,4 +271,20 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
}
}
void MemoryAccessStorage::insertFromBackup(
const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup,
const RestoreSettings & restore_settings,
std::shared_ptr<IRestoreCoordination>)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate);
for (const auto & [id, entity] : entities_from_backup)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Access/IAccessStorage.h>
#include <base/defines.h>
#include <list>
#include <memory>
#include <mutex>
@ -17,7 +18,7 @@ class MemoryAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "memory";
explicit MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_);
explicit MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_, bool allow_backup_);
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -27,6 +28,9 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
@ -35,9 +39,10 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) TSA_REQUIRES(mutex);
struct Entry
{
@ -46,8 +51,9 @@ private:
};
mutable std::mutex mutex;
std::unordered_map<UUID, Entry> entries_by_id; /// We want to search entries both by ID and by the pair of name and type.
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex); /// We want to search entries both by ID and by the pair of name and type.
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
};
}

View File

@ -3,6 +3,7 @@
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <base/range.h>
#include <base/insertAtEnd.h>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm/copy.hpp>
@ -42,14 +43,14 @@ MultipleAccessStorage::~MultipleAccessStorage()
void MultipleAccessStorage::setStorages(const std::vector<StoragePtr> & storages)
{
std::unique_lock lock{mutex};
std::lock_guard lock{mutex};
nested_storages = std::make_shared<const Storages>(storages);
ids_cache.reset();
}
void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
{
std::unique_lock lock{mutex};
std::lock_guard lock{mutex};
if (boost::range::find(*nested_storages, new_storage) != nested_storages->end())
return;
auto new_storages = std::make_shared<Storages>(*nested_storages);
@ -59,7 +60,7 @@ void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove)
{
std::unique_lock lock{mutex};
std::lock_guard lock{mutex};
auto it = boost::range::find(*nested_storages, storage_to_remove);
if (it == nested_storages->end())
return;
@ -189,10 +190,10 @@ AccessEntityPtr MultipleAccessStorage::readImpl(const UUID & id, bool throw_if_n
}
std::optional<String> MultipleAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> MultipleAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
if (auto storage = findStorage(id))
return storage->readName(id, throw_if_not_exists);
return storage->readNameWithType(id, throw_if_not_exists);
if (throw_if_not_exists)
throwNotFound(id);
@ -357,4 +358,65 @@ MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const P
return std::nullopt;
}
bool MultipleAccessStorage::isBackupAllowed() const
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (storage->isBackupAllowed())
return true;
}
return false;
}
bool MultipleAccessStorage::isRestoreAllowed() const
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (storage->isRestoreAllowed())
return true;
}
return false;
}
std::vector<std::pair<UUID, AccessEntityPtr>> MultipleAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const
{
std::vector<std::pair<UUID, AccessEntityPtr>> res;
auto storages = getStoragesInternal();
size_t count = 0;
for (const auto & storage : *storages)
{
if (storage->isBackupAllowed())
{
insertAtEnd(res, storage->readAllForBackup(type, backup_settings));
++count;
}
}
if (!count)
throwBackupNotAllowed();
return res;
}
void MultipleAccessStorage::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination)
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (storage->isRestoreAllowed())
{
storage->insertFromBackup(entities_from_backup, restore_settings, restore_coordination);
return;
}
}
throwRestoreNotAllowed();
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Access/IAccessStorage.h>
#include <base/defines.h>
#include <Common/LRUCache.h>
#include <mutex>
@ -42,11 +43,16 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override;
bool isRestoreAllowed() const override;
std::vector<std::pair<UUID, AccessEntityPtr>> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const override;
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
protected:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
@ -56,8 +62,8 @@ private:
using Storages = std::vector<StoragePtr>;
std::shared_ptr<const Storages> getStoragesInternal() const;
std::shared_ptr<const Storages> nested_storages;
mutable LRUCache<UUID, Storage> ids_cache;
std::shared_ptr<const Storages> nested_storages TSA_GUARDED_BY(mutex);
mutable LRUCache<UUID, Storage> ids_cache TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};

View File

@ -19,5 +19,14 @@ bool Quota::equal(const IAccessEntity & other) const
return (all_limits == other_quota.all_limits) && (key_type == other_quota.key_type) && (to_roles == other_quota.to_roles);
}
std::vector<UUID> Quota::findDependencies() const
{
return to_roles.findDependencies();
}
void Quota::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}
}

View File

@ -45,6 +45,10 @@ struct Quota : public IAccessEntity
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<Quota>(); }
static constexpr const auto TYPE = AccessEntityType::QUOTA;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
};
using QuotaPtr = std::shared_ptr<const Quota>;

View File

@ -2,6 +2,8 @@
#include <Access/MemoryAccessStorage.h>
#include <Access/ReplicatedAccessStorage.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestoreSettings.h>
#include <Backups/IRestoreCoordination.h>
#include <IO/ReadHelpers.h>
#include <boost/container/flat_set.hpp>
#include <Common/ZooKeeper/KeeperException.h>
@ -33,12 +35,14 @@ ReplicatedAccessStorage::ReplicatedAccessStorage(
const String & storage_name_,
const String & zookeeper_path_,
zkutil::GetZooKeeper get_zookeeper_,
AccessChangesNotifier & changes_notifier_)
AccessChangesNotifier & changes_notifier_,
bool allow_backup_)
: IAccessStorage(storage_name_)
, zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, watched_queue(std::make_shared<ConcurrentBoundedQueue<UUID>>(std::numeric_limits<size_t>::max()))
, changes_notifier(changes_notifier_)
, backup_allowed(allow_backup_)
{
if (zookeeper_path.empty())
throw Exception("ZooKeeper path must be non-empty", ErrorCodes::BAD_ARGUMENTS);
@ -99,6 +103,15 @@ static void retryOnZooKeeperUserError(size_t attempts, Func && function)
std::optional<UUID> ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const UUID id = generateRandomID();
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
return id;
return std::nullopt;
}
bool ReplicatedAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType());
const String & name = new_entity->getName();
LOG_DEBUG(getLogger(), "Inserting entity of type {} named {} with id {}", type_info.name, name, toString(id));
@ -108,11 +121,11 @@ std::optional<UUID> ReplicatedAccessStorage::insertImpl(const AccessEntityPtr &
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); });
if (!ok)
return std::nullopt;
return false;
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
return id;
return true;
}
@ -600,4 +613,19 @@ AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if
return entry.entity;
}
void ReplicatedAccessStorage::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path))
return;
bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate);
for (const auto & [id, entity] : entities_from_backup)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
}
}

View File

@ -6,6 +6,7 @@
#include <mutex>
#include <unordered_map>
#include <base/defines.h>
#include <base/scope_guard.h>
#include <Common/ThreadPool.h>
@ -26,7 +27,7 @@ class ReplicatedAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "replicated";
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_);
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup);
virtual ~ReplicatedAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -36,6 +37,9 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
@ -50,6 +54,7 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -66,10 +71,10 @@ private:
bool refresh();
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper);
void refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) TSA_REQUIRES(mutex);
void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity);
void removeEntityNoLock(const UUID & id);
void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity) TSA_REQUIRES(mutex);
void removeEntityNoLock(const UUID & id) TSA_REQUIRES(mutex);
struct Entry
{
@ -82,8 +87,9 @@ private:
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
mutable std::mutex mutex;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex);
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
};
}

View File

@ -1,4 +1,5 @@
#include <Access/Role.h>
#include <base/insertAtEnd.h>
namespace DB
@ -12,4 +13,18 @@ bool Role::equal(const IAccessEntity & other) const
return (access == other_role.access) && (granted_roles == other_role.granted_roles) && (settings == other_role.settings);
}
std::vector<UUID> Role::findDependencies() const
{
std::vector<UUID> res;
insertAtEnd(res, granted_roles.findDependencies());
insertAtEnd(res, settings.findDependencies());
return res;
}
void Role::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
granted_roles.replaceDependencies(old_to_new_ids);
settings.replaceDependencies(old_to_new_ids);
}
}

View File

@ -19,6 +19,10 @@ struct Role : public IAccessEntity
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<Role>(); }
static constexpr const auto TYPE = AccessEntityType::ROLE;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};
using RolePtr = std::shared_ptr<const Role>;

View File

@ -7,6 +7,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <boost/range/algorithm/set_algorithm.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>
#include <base/sort.h>
@ -286,4 +287,54 @@ bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs)
return (lhs.all == rhs.all) && (lhs.ids == rhs.ids) && (lhs.except_ids == rhs.except_ids);
}
std::vector<UUID> RolesOrUsersSet::findDependencies() const
{
std::vector<UUID> res;
boost::range::copy(ids, std::back_inserter(res));
boost::range::copy(except_ids, std::back_inserter(res));
return res;
}
void RolesOrUsersSet::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
std::vector<UUID> new_ids;
for (auto it = ids.begin(); it != ids.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = ids.erase(it);
}
else
{
++it;
}
}
boost::range::copy(new_ids, std::inserter(ids, ids.end()));
new_ids.clear();
for (auto it = except_ids.begin(); it != except_ids.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = except_ids.erase(it);
}
else
{
++it;
}
}
boost::range::copy(new_ids, std::inserter(except_ids, except_ids.end()));
}
}

View File

@ -5,6 +5,7 @@
#include <boost/container/flat_set.hpp>
#include <memory>
#include <optional>
#include <unordered_map>
namespace DB
@ -62,6 +63,9 @@ struct RolesOrUsersSet
friend bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs);
friend bool operator !=(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs) { return !(lhs == rhs); }
std::vector<UUID> findDependencies() const;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids);
bool all = false;
boost::container::flat_set<UUID> ids;
boost::container::flat_set<UUID> except_ids;

View File

@ -58,4 +58,14 @@ bool RowPolicy::equal(const IAccessEntity & other) const
&& restrictive == other_policy.restrictive && (to_roles == other_policy.to_roles);
}
std::vector<UUID> RowPolicy::findDependencies() const
{
return to_roles.findDependencies();
}
void RowPolicy::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}
}

View File

@ -46,6 +46,10 @@ struct RowPolicy : public IAccessEntity
static constexpr const auto TYPE = AccessEntityType::ROW_POLICY;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
/// Which roles or users should use this row policy.
RolesOrUsersSet to_roles;

View File

@ -1,4 +1,5 @@
#include <Access/SettingsProfile.h>
#include <base/insertAtEnd.h>
namespace DB
@ -12,4 +13,18 @@ bool SettingsProfile::equal(const IAccessEntity & other) const
return (elements == other_profile.elements) && (to_roles == other_profile.to_roles);
}
std::vector<UUID> SettingsProfile::findDependencies() const
{
std::vector<UUID> res;
insertAtEnd(res, elements.findDependencies());
insertAtEnd(res, to_roles.findDependencies());
return res;
}
void SettingsProfile::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
elements.replaceDependencies(old_to_new_ids);
to_roles.replaceDependencies(old_to_new_ids);
}
}

View File

@ -20,6 +20,10 @@ struct SettingsProfile : public IAccessEntity
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<SettingsProfile>(); }
static constexpr const auto TYPE = AccessEntityType::SETTINGS_PROFILE;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return elements.isBackupAllowed(); }
};
using SettingsProfilePtr = std::shared_ptr<const SettingsProfile>;

View File

@ -12,6 +12,13 @@
namespace DB
{
namespace
{
constexpr const char ALLOW_BACKUP_SETTING_NAME[] = "allow_backup";
}
SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast)
{
init(ast, nullptr);
@ -41,7 +48,10 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A
/// Optionally check if a setting with that name is allowed.
if (access_control)
access_control->checkSettingNameIsAllowed(setting_name);
{
if (setting_name != ALLOW_BACKUP_SETTING_NAME)
access_control->checkSettingNameIsAllowed(setting_name);
}
value = ast.value;
min_value = ast.min_value;
@ -127,6 +137,36 @@ std::shared_ptr<ASTSettingsProfileElements> SettingsProfileElements::toASTWithNa
}
std::vector<UUID> SettingsProfileElements::findDependencies() const
{
std::vector<UUID> res;
for (const auto & element : *this)
{
if (element.parent_profile)
res.push_back(*element.parent_profile);
}
return res;
}
void SettingsProfileElements::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
for (auto & element : *this)
{
if (element.parent_profile)
{
auto id = *element.parent_profile;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
element.parent_profile = new_id;
}
}
}
}
void SettingsProfileElements::merge(const SettingsProfileElements & other)
{
insert(end(), other.begin(), other.end());
@ -138,8 +178,11 @@ Settings SettingsProfileElements::toSettings() const
Settings res;
for (const auto & elem : *this)
{
if (!elem.setting_name.empty() && !elem.value.isNull())
res.set(elem.setting_name, elem.value);
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.value.isNull())
res.set(elem.setting_name, elem.value);
}
}
return res;
}
@ -149,8 +192,11 @@ SettingsChanges SettingsProfileElements::toSettingsChanges() const
SettingsChanges res;
for (const auto & elem : *this)
{
if (!elem.setting_name.empty() && !elem.value.isNull())
res.push_back({elem.setting_name, elem.value});
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.value.isNull())
res.push_back({elem.setting_name, elem.value});
}
}
return res;
}
@ -160,7 +206,7 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
SettingsConstraints res{access_control};
for (const auto & elem : *this)
{
if (!elem.setting_name.empty())
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.min_value.isNull())
res.setMinValue(elem.setting_name, elem.min_value);
@ -189,5 +235,14 @@ std::vector<UUID> SettingsProfileElements::toProfileIDs() const
return res;
}
bool SettingsProfileElements::isBackupAllowed() const
{
for (const auto & setting : *this)
{
if (setting.setting_name == ALLOW_BACKUP_SETTING_NAME)
return static_cast<bool>(SettingFieldBool{setting.value});
}
return true;
}
}

View File

@ -3,6 +3,7 @@
#include <Core/Field.h>
#include <Core/UUID.h>
#include <optional>
#include <unordered_map>
#include <vector>
@ -57,12 +58,17 @@ public:
std::shared_ptr<ASTSettingsProfileElements> toAST() const;
std::shared_ptr<ASTSettingsProfileElements> toASTWithNames(const AccessControl & access_control) const;
std::vector<UUID> findDependencies() const;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids);
void merge(const SettingsProfileElements & other);
Settings toSettings() const;
SettingsChanges toSettingsChanges() const;
SettingsConstraints toSettingsConstraints(const AccessControl & access_control) const;
std::vector<UUID> toProfileIDs() const;
bool isBackupAllowed() const;
};
}

View File

@ -1,5 +1,6 @@
#include <Access/User.h>
#include <Core/Protocol.h>
#include <base/insertAtEnd.h>
namespace DB
@ -31,4 +32,22 @@ void User::setName(const String & name_)
name = name_;
}
std::vector<UUID> User::findDependencies() const
{
std::vector<UUID> res;
insertAtEnd(res, default_roles.findDependencies());
insertAtEnd(res, granted_roles.findDependencies());
insertAtEnd(res, grantees.findDependencies());
insertAtEnd(res, settings.findDependencies());
return res;
}
void User::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
default_roles.replaceDependencies(old_to_new_ids);
granted_roles.replaceDependencies(old_to_new_ids);
grantees.replaceDependencies(old_to_new_ids);
settings.replaceDependencies(old_to_new_ids);
}
}

View File

@ -29,6 +29,10 @@ struct User : public IAccessEntity
static constexpr const auto TYPE = AccessEntityType::USER;
AccessEntityType getType() const override { return TYPE; }
void setName(const String & name_) override;
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};
using UserPtr = std::shared_ptr<const User>;

View File

@ -523,8 +523,11 @@ namespace
}
}
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_)
: IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier())
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_, bool allow_backup_)
: IAccessStorage(storage_name_)
, access_control(access_control_)
, memory_storage(storage_name_, access_control.getChangesNotifier(), false)
, backup_allowed(allow_backup_)
{
}
@ -655,9 +658,9 @@ AccessEntityPtr UsersConfigAccessStorage::readImpl(const UUID & id, bool throw_i
}
std::optional<String> UsersConfigAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> UsersConfigAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
return memory_storage.readName(id, throw_if_not_exists);
return memory_storage.readNameWithType(id, throw_if_not_exists);
}
}

View File

@ -22,7 +22,7 @@ public:
static constexpr char STORAGE_TYPE[] = "users.xml";
UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_);
UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_, bool allow_backup_);
~UsersConfigAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -44,17 +44,20 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
private:
void parseFromConfig(const Poco::Util::AbstractConfiguration & config);
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
AccessControl & access_control;
MemoryAccessStorage memory_storage;
String path;
std::unique_ptr<ConfigReloader> config_reloader;
bool backup_allowed = false;
mutable std::mutex load_mutex;
};
}

View File

@ -49,7 +49,7 @@ TEST(AccessRights, Union)
"GRANT INSERT ON *.*, "
"GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, "
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, "
"TRUNCATE, OPTIMIZE, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "

View File

@ -24,7 +24,7 @@ TEST(ReplicatedAccessStorage, ShutdownWithFailedStartup)
try
{
auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk, changes_notifier);
auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk, changes_notifier, false);
}
catch (Exception & e)
{

View File

@ -2,6 +2,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/IFactoryWithAliases.h>
#include <Parsers/ASTFunction.h>
#include <functional>
@ -105,4 +106,12 @@ private:
};
struct AggregateUtils
{
static bool isAggregateFunction(const ASTFunction & node)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(node.name);
}
};
}

View File

@ -14,6 +14,7 @@ namespace DB
namespace ErrorCodes
{
extern const int UNEXPECTED_NODE_IN_ZOOKEEPER;
extern const int LOGICAL_ERROR;
}
/// zookeeper_path/file_names/file_name->checksum_and_size
@ -27,32 +28,40 @@ namespace
using FileInfo = IBackupCoordination::FileInfo;
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
String serializePartNamesAndChecksums(const std::vector<PartNameAndChecksum> & part_names_and_checksums)
struct ReplicatedPartNames
{
WriteBufferFromOwnString out;
writeBinary(part_names_and_checksums.size(), out);
for (const auto & part_name_and_checksum : part_names_and_checksums)
{
writeBinary(part_name_and_checksum.part_name, out);
writeBinary(part_name_and_checksum.checksum, out);
}
return out.str();
}
std::vector<PartNameAndChecksum> deserializePartNamesAndChecksums(const String & str)
{
ReadBufferFromString in{str};
std::vector<PartNameAndChecksum> part_names_and_checksums;
size_t num;
readBinary(num, in);
part_names_and_checksums.resize(num);
for (size_t i = 0; i != num; ++i)
String table_name_for_logs;
static String serialize(const std::vector<PartNameAndChecksum> & part_names_and_checksums_, const String & table_name_for_logs_)
{
readBinary(part_names_and_checksums[i].part_name, in);
readBinary(part_names_and_checksums[i].checksum, in);
WriteBufferFromOwnString out;
writeBinary(part_names_and_checksums_.size(), out);
for (const auto & part_name_and_checksum : part_names_and_checksums_)
{
writeBinary(part_name_and_checksum.part_name, out);
writeBinary(part_name_and_checksum.checksum, out);
}
writeBinary(table_name_for_logs_, out);
return out.str();
}
return part_names_and_checksums;
}
static ReplicatedPartNames deserialize(const String & str)
{
ReadBufferFromString in{str};
ReplicatedPartNames res;
size_t num;
readBinary(num, in);
res.part_names_and_checksums.resize(num);
for (size_t i = 0; i != num; ++i)
{
readBinary(res.part_names_and_checksums[i].part_name, in);
readBinary(res.part_names_and_checksums[i].checksum, in);
}
readBinary(res.table_name_for_logs, in);
return res;
}
};
String serializeFileInfo(const FileInfo & info)
{
@ -122,7 +131,7 @@ namespace
BackupCoordinationDistributed::BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, preparing_barrier(zookeeper_path_ + "/preparing", get_zookeeper_, "BackupCoordination", "preparing")
, stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("BackupCoordination"))
{
createRootNodes();
}
@ -134,8 +143,8 @@ void BackupCoordinationDistributed::createRootNodes()
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_parts", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_infos", "");
zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
@ -147,101 +156,89 @@ void BackupCoordinationDistributed::removeAllNodes()
zookeeper->removeRecursive(zookeeper_path);
}
void BackupCoordinationDistributed::addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path)
void BackupCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_data_path);
zookeeper->createIfNotExists(path, "");
stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout);
}
void BackupCoordinationDistributed::addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
void BackupCoordinationDistributed::syncStageError(const String & current_host, const String & error_message)
{
stage_sync.syncStageError(current_host, error_message);
}
void BackupCoordinationDistributed::addReplicatedPartNames(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_parts/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(host_id);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name.first);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name.second);
zookeeper->create(path, serializePartNamesAndChecksums(part_names_and_checksums), zkutil::CreateMode::Persistent);
}
void BackupCoordinationDistributed::finishPreparing(const String & host_id, const String & error_message)
{
preparing_barrier.finish(host_id, error_message);
}
void BackupCoordinationDistributed::waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout) const
{
preparing_barrier.waitForAllHostsToFinish(host_ids, timeout);
prepareReplicatedTablesInfo();
}
void BackupCoordinationDistributed::prepareReplicatedTablesInfo() const
{
replicated_tables.emplace();
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths";
for (const String & escaped_table_zk_path : zookeeper->getChildren(path))
{
String table_zk_path = unescapeForFileName(escaped_table_zk_path);
for (const String & escaped_data_path : zookeeper->getChildren(path + "/" + escaped_table_zk_path))
{
String data_path = unescapeForFileName(escaped_data_path);
replicated_tables->addDataPath(table_zk_path, data_path);
}
std::lock_guard lock{mutex};
if (replicated_part_names)
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()");
}
path = zookeeper_path + "/repl_tables_parts";
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
}
Strings BackupCoordinationDistributed::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
{
std::lock_guard lock{mutex};
prepareReplicatedPartNames();
return replicated_part_names->getPartNames(table_zk_path, replica_name);
}
void BackupCoordinationDistributed::addReplicatedDataPath(
const String & table_zk_path, const String & data_path)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/";
zookeeper->create(path, data_path, zkutil::CreateMode::PersistentSequential);
}
Strings BackupCoordinationDistributed::getReplicatedDataPaths(const String & table_zk_path) const
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path);
Strings children = zookeeper->getChildren(path);
Strings data_paths;
data_paths.reserve(children.size());
for (const String & child : children)
data_paths.push_back(zookeeper->get(path + "/" + child));
return data_paths;
}
void BackupCoordinationDistributed::prepareReplicatedPartNames() const
{
if (replicated_part_names)
return;
replicated_part_names.emplace();
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_part_names";
for (const String & escaped_table_zk_path : zookeeper->getChildren(path))
{
String table_zk_path = unescapeForFileName(escaped_table_zk_path);
String path2 = path + "/" + escaped_table_zk_path;
for (const String & escaped_host_id : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
{
String host_id = unescapeForFileName(escaped_host_id);
String path3 = path2 + "/" + escaped_host_id;
for (const String & escaped_database_name : zookeeper->getChildren(path3))
{
String database_name = unescapeForFileName(escaped_database_name);
String path4 = path3 + "/" + escaped_database_name;
for (const String & escaped_table_name : zookeeper->getChildren(path4))
{
String table_name = unescapeForFileName(escaped_table_name);
String path5 = path4 + "/" + escaped_table_name;
auto part_names_and_checksums = deserializePartNamesAndChecksums(zookeeper->get(path5));
replicated_tables->addPartNames(host_id, {database_name, table_name}, table_zk_path, part_names_and_checksums);
}
}
String replica_name = unescapeForFileName(escaped_replica_name);
auto part_names = ReplicatedPartNames::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
replicated_part_names->addPartNames(table_zk_path, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
}
}
replicated_tables->preparePartNamesByLocations();
}
Strings BackupCoordinationDistributed::getReplicatedTableDataPaths(const String & table_zk_path) const
{
return replicated_tables->getDataPaths(table_zk_path);
}
Strings BackupCoordinationDistributed::getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const
{
return replicated_tables->getPartNames(host_id, table_name, table_zk_path);
}
void BackupCoordinationDistributed::addFileInfo(const FileInfo & file_info, bool & is_data_file_required)
{
@ -305,12 +302,19 @@ std::vector<FileInfo> BackupCoordinationDistributed::getAllFileInfos() const
return file_infos;
}
Strings BackupCoordinationDistributed::listFiles(const String & prefix, const String & terminator) const
Strings BackupCoordinationDistributed::listFiles(const String & directory, bool recursive) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
String terminator = recursive ? "" : "/";
Strings elements;
std::unordered_set<std::string_view> unique_elements;
for (const String & escaped_name : escaped_names)
{
String name = unescapeForFileName(escaped_name);
@ -321,15 +325,35 @@ Strings BackupCoordinationDistributed::listFiles(const String & prefix, const St
if (!terminator.empty())
end_pos = name.find(terminator, start_pos);
std::string_view new_element = std::string_view{name}.substr(start_pos, end_pos - start_pos);
if (!elements.empty() && (elements.back() == new_element))
if (unique_elements.contains(new_element))
continue;
elements.push_back(String{new_element});
unique_elements.emplace(new_element);
}
::sort(elements.begin(), elements.end());
return elements;
}
bool BackupCoordinationDistributed::hasFiles(const String & directory) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
for (const String & escaped_name : escaped_names)
{
String name = unescapeForFileName(escaped_name);
if (name.starts_with(prefix))
return true;
}
return false;
}
std::optional<FileInfo> BackupCoordinationDistributed::getFileInfo(const String & file_name) const
{
auto zookeeper = get_zookeeper();

View File

@ -2,9 +2,6 @@
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationHelpers.h>
#include <Common/ZooKeeper/Common.h>
#include <map>
#include <unordered_map>
namespace DB
@ -17,24 +14,26 @@ public:
BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
~BackupCoordinationDistributed() override;
void addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path) override;
void addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
void syncStageError(const String & current_host, const String & error_message) override;
void addReplicatedPartNames(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
void finishPreparing(const String & host_id, const String & error_message) override;
void waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout) const override;
Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override;
Strings getReplicatedTableDataPaths(const String & table_zk_path) const override;
Strings getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const override;
void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override;
Strings getReplicatedDataPaths(const String & table_zk_path) const override;
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
void updateFileInfo(const FileInfo & file_info) override;
std::vector<FileInfo> getAllFileInfos() const override;
Strings listFiles(const String & prefix, const String & terminator) const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
std::optional<FileInfo> getFileInfo(const String & file_name) const override;
std::optional<FileInfo> getFileInfo(const SizeAndChecksum & size_and_checksum) const override;
std::optional<SizeAndChecksum> getFileSizeAndChecksum(const String & file_name) const override;
@ -47,12 +46,15 @@ public:
private:
void createRootNodes();
void removeAllNodes();
void prepareReplicatedTablesInfo() const;
void prepareReplicatedPartNames() const;
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
BackupCoordinationDistributedBarrier preparing_barrier;
mutable std::optional<BackupCoordinationReplicatedTablesInfo> replicated_tables;
BackupCoordinationStageSync stage_sync;
mutable std::mutex mutex;
mutable std::optional<BackupCoordinationReplicatedPartNames> replicated_part_names;
};
}

View File

@ -1,6 +1,8 @@
#include <Backups/BackupCoordinationHelpers.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Common/Exception.h>
#include <Common/escapeForFileName.h>
#include <IO/ReadHelpers.h>
#include <base/chrono_io.h>
#include <boost/range/adaptor/map.hpp>
@ -16,37 +18,26 @@ namespace ErrorCodes
}
struct BackupCoordinationReplicatedTablesInfo::HostAndTableName
namespace
{
String host_id;
DatabaseAndTableName table_name;
struct Less
struct LessReplicaName
{
bool operator()(const HostAndTableName & lhs, const HostAndTableName & rhs) const
{
return (lhs.host_id < rhs.host_id) || ((lhs.host_id == rhs.host_id) && (lhs.table_name < rhs.table_name));
}
bool operator()(const std::shared_ptr<const HostAndTableName> & lhs, const std::shared_ptr<const HostAndTableName> & rhs) const
{
return operator()(*lhs, *rhs);
}
bool operator()(const std::shared_ptr<const String> & left, const std::shared_ptr<const String> & right) { return *left < *right; }
};
};
}
class BackupCoordinationReplicatedTablesInfo::CoveredPartsFinder
class BackupCoordinationReplicatedPartNames::CoveredPartsFinder
{
public:
CoveredPartsFinder() = default;
explicit CoveredPartsFinder(const String & table_name_for_logs_) : table_name_for_logs(table_name_for_logs_) {}
void addPart(const String & new_part_name, const std::shared_ptr<const HostAndTableName> & host_and_table_name)
void addPartName(const String & new_part_name, const std::shared_ptr<const String> & replica_name)
{
addPart(MergeTreePartInfo::fromPartName(new_part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING), host_and_table_name);
addPartName(MergeTreePartInfo::fromPartName(new_part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING), replica_name);
}
void addPart(MergeTreePartInfo && new_part_info, const std::shared_ptr<const HostAndTableName> & host_and_table_name)
void addPartName(MergeTreePartInfo && new_part_info, const std::shared_ptr<const String> & replica_name)
{
auto new_min_block = new_part_info.min_block;
auto new_max_block = new_part_info.max_block;
@ -57,7 +48,7 @@ public:
if (first_it == parts.end())
{
/// All max_blocks < part_info.min_block, so we can safely add the `part_info` to the list of parts.
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), host_and_table_name});
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
return;
}
@ -68,7 +59,7 @@ public:
{
/// (prev_info.max_block < part_info.min_block) AND (part_info.max_block < current_info.min_block),
/// so we can safely add the `part_info` to the list of parts.
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), host_and_table_name});
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
return;
}
@ -92,22 +83,19 @@ public:
{
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Intersected parts detected: {} in the table {}.{}{} and {} in the table {}.{}{}. It should be investigated",
"Intersected parts detected in the table {}: {} on replica {} and {} on replica {}. It should be investigated",
table_name_for_logs,
part.info.getPartName(),
part.host_and_table_name->table_name.first,
part.host_and_table_name->table_name.second,
part.host_and_table_name->host_id.empty() ? "" : (" on the host " + part.host_and_table_name->host_id),
*part.replica_name,
new_part_info.getPartName(),
host_and_table_name->table_name.first,
host_and_table_name->table_name.second,
host_and_table_name->host_id.empty() ? "" : (" on the host " + host_and_table_name->host_id));
*replica_name);
}
++last_it;
}
/// `part_info` will replace multiple parts [first_it..last_it)
parts.erase(first_it, last_it);
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), host_and_table_name});
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
}
bool isCoveredByAnotherPart(const String & part_name) const
@ -156,185 +144,175 @@ private:
struct PartInfo
{
MergeTreePartInfo info;
std::shared_ptr<const HostAndTableName> host_and_table_name;
std::shared_ptr<const String> replica_name;
};
using Parts = std::map<Int64 /* max_block */, PartInfo>;
std::unordered_map<String, Parts> partitions;
const String table_name_for_logs;
};
void BackupCoordinationReplicatedTablesInfo::addDataPath(const String & table_zk_path, const String & table_data_path)
{
tables[table_zk_path].data_paths.push_back(table_data_path);
}
BackupCoordinationReplicatedPartNames::BackupCoordinationReplicatedPartNames() = default;
BackupCoordinationReplicatedPartNames::~BackupCoordinationReplicatedPartNames() = default;
Strings BackupCoordinationReplicatedTablesInfo::getDataPaths(const String & table_zk_path) const
{
auto it = tables.find(table_zk_path);
if (it == tables.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "getDataPaths() called for unknown table_zk_path: {}", table_zk_path);
const auto & replicated_table = it->second;
return replicated_table.data_paths;
}
void BackupCoordinationReplicatedTablesInfo::addPartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
void BackupCoordinationReplicatedPartNames::addPartNames(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{
auto & table = tables[table_zk_path];
auto & part_locations_by_names = table.part_locations_by_names;
auto host_and_table_name = std::make_shared<HostAndTableName>();
host_and_table_name->host_id = host_id;
host_and_table_name->table_name = table_name;
if (part_names_prepared)
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()");
auto & table_info = table_infos[table_zk_path];
if (!table_info.covered_parts_finder)
table_info.covered_parts_finder = std::make_unique<CoveredPartsFinder>(table_name_for_logs);
auto replica_name_ptr = std::make_shared<String>(replica_name);
for (const auto & part_name_and_checksum : part_names_and_checksums)
{
const auto & part_name = part_name_and_checksum.part_name;
const auto & checksum = part_name_and_checksum.checksum;
auto it = part_locations_by_names.find(part_name);
if (it == part_locations_by_names.end())
auto it = table_info.parts_replicas.find(part_name);
if (it == table_info.parts_replicas.end())
{
it = part_locations_by_names.emplace(part_name, PartLocations{}).first;
it = table_info.parts_replicas.emplace(part_name, PartReplicas{}).first;
it->second.checksum = checksum;
}
else
{
const auto & existing = it->second;
if (existing.checksum != checksum)
const auto & other = it->second;
if (other.checksum != checksum)
{
const auto & existing_host_and_table_name = **existing.host_and_table_names.begin();
const String & other_replica_name = **other.replica_names.begin();
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Table {}.{} has part {} which is different from the part of table {}.{}. Must be the same",
table_name.first,
table_name.second,
"Table {} on replica {} has part {} which is different from the part on replica {}. Must be the same",
table_name_for_logs,
replica_name,
part_name,
existing_host_and_table_name.table_name.first,
existing_host_and_table_name.table_name.second);
other_replica_name);
}
}
auto & host_and_table_names = it->second.host_and_table_names;
auto & replica_names = it->second.replica_names;
/// `host_and_table_names` should be ordered because we need this vector to be in the same order on every replica.
host_and_table_names.insert(
std::upper_bound(host_and_table_names.begin(), host_and_table_names.end(), host_and_table_name, HostAndTableName::Less{}),
host_and_table_name);
/// `replica_names` should be ordered because we need this vector to be in the same order on every replica.
replica_names.insert(
std::upper_bound(replica_names.begin(), replica_names.end(), replica_name_ptr, LessReplicaName{}), replica_name_ptr);
table_info.covered_parts_finder->addPartName(part_name, replica_name_ptr);
}
}
Strings BackupCoordinationReplicatedTablesInfo::getPartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const
Strings BackupCoordinationReplicatedPartNames::getPartNames(const String & table_zk_path, const String & replica_name) const
{
if (!part_names_by_locations_prepared)
throw Exception(ErrorCodes::LOGICAL_ERROR, "preparePartNamesByLocations() was not called before getPartNames()");
auto it = tables.find(table_zk_path);
if (it == tables.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "getPartNames() called for unknown table_zk_path: {}", table_zk_path);
const auto & table = it->second;
auto it2 = table.part_names_by_locations.find(host_id);
if (it2 == table.part_names_by_locations.end())
preparePartNames();
auto it = table_infos.find(table_zk_path);
if (it == table_infos.end())
return {};
const auto & part_names_by_host_id = it2->second;
auto it3 = part_names_by_host_id.find(table_name);
if (it3 == part_names_by_host_id.end())
const auto & replicas_parts = it->second.replicas_parts;
auto it2 = replicas_parts.find(replica_name);
if (it2 == replicas_parts.end())
return {};
return it3->second;
return it2->second;
}
void BackupCoordinationReplicatedTablesInfo::preparePartNamesByLocations()
void BackupCoordinationReplicatedPartNames::preparePartNames() const
{
if (part_names_by_locations_prepared)
if (part_names_prepared)
return;
part_names_by_locations_prepared = true;
size_t counter = 0;
for (auto & table : tables | boost::adaptors::map_values)
for (const auto & table_info : table_infos | boost::adaptors::map_values)
{
CoveredPartsFinder covered_parts_finder;
for (const auto & [part_name, part_locations] : table.part_locations_by_names)
covered_parts_finder.addPart(part_name, *part_locations.host_and_table_names.begin());
table.part_names_by_locations.clear();
for (const auto & [part_name, part_locations] : table.part_locations_by_names)
for (const auto & [part_name, part_replicas] : table_info.parts_replicas)
{
if (covered_parts_finder.isCoveredByAnotherPart(part_name))
if (table_info.covered_parts_finder->isCoveredByAnotherPart(part_name))
continue;
size_t chosen_index = (counter++) % part_locations.host_and_table_names.size();
const auto & chosen_host_id = part_locations.host_and_table_names[chosen_index]->host_id;
const auto & chosen_table_name = part_locations.host_and_table_names[chosen_index]->table_name;
table.part_names_by_locations[chosen_host_id][chosen_table_name].push_back(part_name);
size_t chosen_index = (counter++) % part_replicas.replica_names.size();
const auto & chosen_replica_name = *part_replicas.replica_names[chosen_index];
table_info.replicas_parts[chosen_replica_name].push_back(part_name);
}
}
part_names_prepared = true;
}
BackupCoordinationDistributedBarrier::BackupCoordinationDistributedBarrier(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, const String & logger_name_, const String & operation_name_)
/// Helps to wait until all hosts come to a specified stage.
BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(&Poco::Logger::get(logger_name_))
, operation_name(operation_name_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationDistributedBarrier::createRootNodes()
void BackupCoordinationStageSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationDistributedBarrier::finish(const String & host_id, const String & error_message)
void BackupCoordinationStageSync::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout)
{
if (error_message.empty())
LOG_TRACE(log, "Host {} has finished {}", host_id, operation_name);
else
LOG_ERROR(log, "Host {} has failed {} with message: {}", host_id, operation_name, error_message);
/// Put new stage to ZooKeeper.
auto zookeeper = get_zookeeper();
if (error_message.empty())
zookeeper->create(zookeeper_path + "/" + host_id + ":ready", "", zkutil::CreateMode::Persistent);
else
zookeeper->create(zookeeper_path + "/" + host_id + ":error", error_message, zkutil::CreateMode::Persistent);
}
zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + std::to_string(new_stage), "");
void BackupCoordinationDistributedBarrier::waitForAllHostsToFinish(const Strings & host_ids, const std::chrono::seconds timeout) const
{
auto zookeeper = get_zookeeper();
if (wait_hosts.empty() || ((wait_hosts.size() == 1) && (wait_hosts.front() == current_host)))
return;
bool all_hosts_ready = false;
String not_ready_host_id;
String error_host_id;
String error_message;
/// Wait for other hosts.
/// Returns true of everything's ready, or false if we need to wait more.
auto process_nodes = [&](const Strings & nodes)
/// Current stages of all hosts.
std::optional<String> host_with_error;
std::optional<String> error_message;
std::map<String, std::optional<int>> unready_hosts;
for (const String & host : wait_hosts)
unready_hosts.emplace(host, std::optional<int>{});
/// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`.
auto process_zk_nodes = [&](const Strings & zk_nodes)
{
std::unordered_set<std::string_view> set{nodes.begin(), nodes.end()};
for (const String & host_id : host_ids)
for (const String & zk_node : zk_nodes)
{
if (set.contains(host_id + ":error"))
if (zk_node == "error")
{
error_host_id = host_id;
error_message = zookeeper->get(zookeeper_path + "/" + host_id + ":error");
String str = zookeeper->get(zookeeper_path + "/" + zk_node);
size_t separator_pos = str.find('|');
if (separator_pos == String::npos)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected value of zk node {}: {}", zookeeper_path + "/" + zk_node, str);
host_with_error = str.substr(0, separator_pos);
error_message = str.substr(separator_pos + 1);
return;
}
if (!set.contains(host_id + ":ready"))
else if (!zk_node.starts_with("remove_watch-"))
{
LOG_TRACE(log, "Waiting for host {} {}", host_id, operation_name);
not_ready_host_id = host_id;
return;
size_t separator_pos = zk_node.find('|');
if (separator_pos == String::npos)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node);
String host = zk_node.substr(0, separator_pos);
int found_stage = parseFromString<int>(zk_node.substr(separator_pos + 1));
auto it = unready_hosts.find(host);
if (it != unready_hosts.end())
{
auto & stage = it->second;
if (!stage || (stage < found_stage))
stage = found_stage;
if (stage >= new_stage)
unready_hosts.erase(it);
}
}
}
all_hosts_ready = true;
};
/// Wait until all hosts are ready or an error happens or time is out.
std::atomic<bool> watch_set = false;
std::condition_variable watch_triggered_event;
@ -347,33 +325,25 @@ void BackupCoordinationDistributedBarrier::waitForAllHostsToFinish(const Strings
auto watch_triggered = [&] { return !watch_set; };
bool use_timeout = (timeout.count() >= 0);
std::chrono::steady_clock::duration time_left = timeout;
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
std::chrono::steady_clock::duration elapsed;
std::mutex dummy_mutex;
while (true)
while (!unready_hosts.empty() && !error_message)
{
if (use_timeout && (time_left.count() <= 0))
{
Strings children = zookeeper->getChildren(zookeeper_path);
process_nodes(children);
break;
}
watch_set = true;
Strings children = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
process_nodes(children);
if (!error_message.empty() || all_hosts_ready)
break;
Strings nodes = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
process_zk_nodes(nodes);
if (!unready_hosts.empty() && !error_message)
{
LOG_TRACE(log, "Waiting for host {}", unready_hosts.begin()->first);
std::unique_lock dummy_lock{dummy_mutex};
if (use_timeout)
{
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
if (!watch_triggered_event.wait_for(dummy_lock, time_left, watch_triggered))
elapsed = std::chrono::steady_clock::now() - start_time;
if ((elapsed > timeout) || !watch_triggered_event.wait_for(dummy_lock, timeout - elapsed, watch_triggered))
break;
time_left -= (std::chrono::steady_clock::now() - start_time);
}
else
watch_triggered_event.wait(dummy_lock, watch_triggered);
@ -385,32 +355,26 @@ void BackupCoordinationDistributedBarrier::waitForAllHostsToFinish(const Strings
/// Remove watch by triggering it.
zookeeper->create(zookeeper_path + "/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
std::unique_lock dummy_lock{dummy_mutex};
watch_triggered_event.wait_for(dummy_lock, timeout, watch_triggered);
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
if (!error_message.empty())
if (error_message)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Error occurred on host {}: {}", *host_with_error, *error_message);
if (!unready_hosts.empty())
{
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} failed {} with message: {}",
error_host_id,
operation_name,
error_message);
"Waited for host {} too long ({})",
unready_hosts.begin()->first,
to_string(elapsed));
}
}
if (all_hosts_ready)
{
LOG_TRACE(log, "All hosts have finished {}", operation_name);
return;
}
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} has failed {}: Time ({}) is out",
not_ready_host_id,
operation_name,
to_string(timeout));
void BackupCoordinationStageSync::syncStageError(const String & current_host, const String & error_message)
{
auto zookeeper = get_zookeeper();
zookeeper->createIfNotExists(zookeeper_path + "/error", current_host + "|" + error_message);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/IBackupCoordination.h>
#include <Backups/IRestoreCoordination.h>
#include <Common/ZooKeeper/Common.h>
#include <map>
#include <unordered_map>
@ -10,81 +11,67 @@ namespace DB
{
/// Helper designed to be used in an implementation of the IBackupCoordination interface in the part related to replicated tables.
class BackupCoordinationReplicatedTablesInfo
class BackupCoordinationReplicatedPartNames
{
public:
BackupCoordinationReplicatedTablesInfo() = default;
/// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getReplicatedTableDataPaths().
void addDataPath(const String & table_zk_path, const String & table_data_path);
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedTableDataPath()).
Strings getDataPaths(const String & table_zk_path) const;
BackupCoordinationReplicatedPartNames();
~BackupCoordinationReplicatedPartNames();
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
/// getReplicatedTablePartNames().
/// getPartNames().
/// Checksums are used only to control that parts under the same names on different replicas are the same.
void addPartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums);
void preparePartNamesByLocations();
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
/// This is the same list as it was added by call of the function addReplicatedTablePartNames() but without duplications and without
/// This is the same list as it was added by call of the function addPartNames() but without duplications and without
/// parts covered by another parts.
Strings getPartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const;
Strings getPartNames(const String & table_zk_path, const String & replica_name) const;
private:
class CoveredPartsFinder;
struct HostAndTableName;
void preparePartNames() const;
struct PartLocations
class CoveredPartsFinder;
struct PartReplicas
{
std::vector<std::shared_ptr<const HostAndTableName>> host_and_table_names;
std::vector<std::shared_ptr<const String>> replica_names;
UInt128 checksum;
};
struct TableInfo
{
Strings data_paths;
std::map<String /* part_name */, PartLocations> part_locations_by_names; /// Should be ordered because we need this map to be in the same order on every replica.
std::unordered_map<String /* host_id */, std::map<DatabaseAndTableName, Strings /* part_names */>> part_names_by_locations;
std::map<String /* part_name */, PartReplicas> parts_replicas; /// Should be ordered because we need this map to be in the same order on every replica.
mutable std::unordered_map<String /* replica_name> */, Strings> replicas_parts;
std::unique_ptr<CoveredPartsFinder> covered_parts_finder;
};
std::unordered_map<String /* zk_path */, TableInfo> tables;
bool part_names_by_locations_prepared = false;
std::map<String /* table_zk_path */, TableInfo> table_infos; /// Should be ordered because we need this map to be in the same order on every replica.
mutable bool part_names_prepared = false;
};
/// Helper designed to be used in the implementation of the BackupCoordinationDistributed and RestoreCoordinationDistributed classes
/// to implement synchronization when we need all hosts to finish a specific task and then continue.
class BackupCoordinationDistributedBarrier
/// Helps to wait until all hosts come to a specified stage.
class BackupCoordinationStageSync
{
public:
BackupCoordinationDistributedBarrier(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, const String & logger_name_, const String & operation_name_);
BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets that a specified host has finished the specific task, successfully or with an error.
/// In the latter case `error_message` should be set.
void finish(const String & host_id, const String & error_message = {});
/// Waits for a specified list of hosts to finish the specific task.
void waitForAllHostsToFinish(const Strings & host_ids, const std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */) const;
void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout);
void syncStageError(const String & current_host, const String & error_message);
private:
void createRootNodes();
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
const Poco::Logger * log;
String operation_name;
Poco::Logger * log;
};
}

View File

@ -10,47 +10,43 @@ namespace DB
using SizeAndChecksum = IBackupCoordination::SizeAndChecksum;
using FileInfo = IBackupCoordination::FileInfo;
BackupCoordinationLocal::BackupCoordinationLocal() : log(&Poco::Logger::get("BackupCoordination"))
{
}
BackupCoordinationLocal::BackupCoordinationLocal() = default;
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
void BackupCoordinationLocal::addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path)
void BackupCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds)
{
}
void BackupCoordinationLocal::syncStageError(const String &, const String &)
{
}
void BackupCoordinationLocal::addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{
std::lock_guard lock{mutex};
replicated_tables.addDataPath(table_zk_path, table_data_path);
replicated_part_names.addPartNames(table_zk_path, table_name_for_logs, replica_name, part_names_and_checksums);
}
void BackupCoordinationLocal::addReplicatedTablePartNames(const String & /* host_id */, const DatabaseAndTableName & table_name, const String & table_zk_path, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
{
std::lock_guard lock{mutex};
replicated_tables.addPartNames("", table_name, table_zk_path, part_names_and_checksums);
return replicated_part_names.getPartNames(table_zk_path, replica_name);
}
void BackupCoordinationLocal::finishPreparing(const String & /* host_id */, const String & error_message)
{
LOG_TRACE(log, "Finished preparing{}", (error_message.empty() ? "" : (" with error " + error_message)));
if (!error_message.empty())
return;
replicated_tables.preparePartNamesByLocations();
}
void BackupCoordinationLocal::waitForAllHostsPrepared(const Strings & /* host_ids */, std::chrono::seconds /* timeout */) const
{
}
Strings BackupCoordinationLocal::getReplicatedTableDataPaths(const String & table_zk_path) const
void BackupCoordinationLocal::addReplicatedDataPath(const String & table_zk_path, const String & data_path)
{
std::lock_guard lock{mutex};
return replicated_tables.getDataPaths(table_zk_path);
replicated_data_paths[table_zk_path].push_back(data_path);
}
Strings BackupCoordinationLocal::getReplicatedTablePartNames(const String & /* host_id */, const DatabaseAndTableName & table_name, const String & table_zk_path) const
Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_zk_path) const
{
std::lock_guard lock{mutex};
return replicated_tables.getPartNames("", table_name, table_zk_path);
auto it = replicated_data_paths.find(table_zk_path);
if (it == replicated_data_paths.end())
return {};
return it->second;
}
@ -93,9 +89,14 @@ std::vector<FileInfo> BackupCoordinationLocal::getAllFileInfos() const
return res;
}
Strings BackupCoordinationLocal::listFiles(const String & prefix, const String & terminator) const
Strings BackupCoordinationLocal::listFiles(const String & directory, bool recursive) const
{
std::lock_guard lock{mutex};
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
String terminator = recursive ? "" : "/";
Strings elements;
for (auto it = file_names.lower_bound(prefix); it != file_names.end(); ++it)
{
@ -111,9 +112,25 @@ Strings BackupCoordinationLocal::listFiles(const String & prefix, const String &
continue;
elements.push_back(String{new_element});
}
return elements;
}
bool BackupCoordinationLocal::hasFiles(const String & directory) const
{
std::lock_guard lock{mutex};
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
auto it = file_names.lower_bound(prefix);
if (it == file_names.end())
return false;
const String & name = it->first;
return name.starts_with(prefix);
}
std::optional<FileInfo> BackupCoordinationLocal::getFileInfo(const String & file_name) const
{
std::lock_guard lock{mutex};

View File

@ -2,6 +2,7 @@
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationHelpers.h>
#include <base/defines.h>
#include <map>
#include <mutex>
@ -18,24 +19,22 @@ public:
BackupCoordinationLocal();
~BackupCoordinationLocal() override;
void addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path) override;
void addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
void syncStageError(const String & current_host, const String & error_message) override;
void finishPreparing(const String & host_id, const String & error_message) override;
void waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout) const override;
void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override;
Strings getReplicatedTableDataPaths(const String & table_zk_path) const override;
Strings getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const override;
void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override;
Strings getReplicatedDataPaths(const String & table_zk_path) const override;
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
void updateFileInfo(const FileInfo & file_info) override;
std::vector<FileInfo> getAllFileInfos() const override;
Strings listFiles(const String & prefix, const String & terminator) const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
std::optional<FileInfo> getFileInfo(const String & file_name) const override;
std::optional<FileInfo> getFileInfo(const SizeAndChecksum & size_and_checksum) const override;
@ -46,13 +45,12 @@ public:
private:
mutable std::mutex mutex;
BackupCoordinationReplicatedTablesInfo replicated_tables;
std::map<String /* file_name */, SizeAndChecksum> file_names; /// Should be ordered alphabetically, see listFiles(). For empty files we assume checksum = 0.
std::map<SizeAndChecksum, FileInfo> file_infos; /// Information about files. Without empty files.
Strings archive_suffixes;
size_t current_archive_suffix = 0;
const Poco::Logger * log;
BackupCoordinationReplicatedPartNames replicated_part_names TSA_GUARDED_BY(mutex);
std::unordered_map<String, Strings> replicated_data_paths TSA_GUARDED_BY(mutex);
std::map<String /* file_name */, SizeAndChecksum> file_names TSA_GUARDED_BY(mutex); /// Should be ordered alphabetically, see listFiles(). For empty files we assume checksum = 0.
std::map<SizeAndChecksum, FileInfo> file_infos TSA_GUARDED_BY(mutex); /// Information about files. Without empty files.
Strings archive_suffixes TSA_GUARDED_BY(mutex);
size_t current_archive_suffix TSA_GUARDED_BY(mutex) = 0;
};

View File

@ -0,0 +1,550 @@
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupUtils.h>
#include <Databases/IDatabase.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Storages/IStorage.h>
#include <base/chrono_io.h>
#include <base/insertAtEnd.h>
#include <Common/escapeForFileName.h>
#include <boost/range/algorithm/copy.hpp>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COLLECT_OBJECTS_FOR_BACKUP;
extern const int CANNOT_BACKUP_TABLE;
extern const int TABLE_IS_DROPPED;
extern const int LOGICAL_ERROR;
}
bool BackupEntriesCollector::TableKey::operator ==(const TableKey & right) const
{
return (name == right.name) && (is_temporary == right.is_temporary);
}
bool BackupEntriesCollector::TableKey::operator <(const TableKey & right) const
{
return (name < right.name) || ((name == right.name) && (is_temporary < right.is_temporary));
}
std::string_view BackupEntriesCollector::toString(Stage stage)
{
switch (stage)
{
case Stage::kPreparing: return "Preparing";
case Stage::kFindingTables: return "Finding tables";
case Stage::kExtractingDataFromTables: return "Extracting data from tables";
case Stage::kRunningPostTasks: return "Running post tasks";
case Stage::kWritingBackup: return "Writing backup";
case Stage::kError: return "Error";
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup stage: {}", static_cast<int>(stage));
}
BackupEntriesCollector::BackupEntriesCollector(
const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ContextPtr & context_,
std::chrono::seconds timeout_)
: backup_query_elements(backup_query_elements_)
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
, context(context_)
, timeout(timeout_)
, log(&Poco::Logger::get("BackupEntriesCollector"))
{
}
BackupEntriesCollector::~BackupEntriesCollector() = default;
BackupEntries BackupEntriesCollector::getBackupEntries()
{
try
{
/// getBackupEntries() must not be called multiple times.
if (current_stage != Stage::kPreparing)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries");
/// Calculate the root path for collecting backup entries, it's either empty or has the format "shards/<shard_num>/replicas/<replica_num>/".
calculateRootPathInBackup();
/// Do renaming in the create queries according to the renaming config.
renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements);
/// Find databases and tables which we're going to put to the backup.
setStage(Stage::kFindingTables);
collectDatabasesAndTablesInfo();
/// Make backup entries for the definitions of the found databases.
makeBackupEntriesForDatabasesDefs();
/// Make backup entries for the definitions of the found tables.
makeBackupEntriesForTablesDefs();
/// Make backup entries for the data of the found tables.
setStage(Stage::kExtractingDataFromTables);
makeBackupEntriesForTablesData();
/// Run all the tasks added with addPostCollectingTask().
setStage(Stage::kRunningPostTasks);
runPostCollectingTasks();
/// No more backup entries or tasks are allowed after this point.
setStage(Stage::kWritingBackup);
return std::move(backup_entries);
}
catch (...)
{
try
{
setStage(Stage::kError, getCurrentExceptionMessage(false));
}
catch (...)
{
}
throw;
}
}
void BackupEntriesCollector::setStage(Stage new_stage, const String & error_message)
{
if (new_stage == Stage::kError)
LOG_ERROR(log, "{} failed with error: {}", toString(current_stage), error_message);
else
LOG_TRACE(log, "{}", toString(new_stage));
current_stage = new_stage;
if (new_stage == Stage::kError)
{
backup_coordination->syncStageError(backup_settings.host_id, error_message);
}
else
{
auto all_hosts
= BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->syncStage(backup_settings.host_id, static_cast<int>(new_stage), all_hosts, timeout);
}
}
/// Calculates the root path for collecting backup entries,
/// it's either empty or has the format "shards/<shard_num>/replicas/<replica_num>/".
void BackupEntriesCollector::calculateRootPathInBackup()
{
root_path_in_backup = "/";
if (!backup_settings.host_id.empty())
{
auto [shard_num, replica_num]
= BackupSettings::Util::findShardNumAndReplicaNum(backup_settings.cluster_host_ids, backup_settings.host_id);
root_path_in_backup = root_path_in_backup / fs::path{"shards"} / std::to_string(shard_num) / "replicas" / std::to_string(replica_num);
}
LOG_TRACE(log, "Will use path in backup: {}", doubleQuoteString(String{root_path_in_backup}));
}
/// Finds databases and tables which we will put to the backup.
void BackupEntriesCollector::collectDatabasesAndTablesInfo()
{
bool use_timeout = (timeout.count() >= 0);
auto start_time = std::chrono::steady_clock::now();
int pass = 0;
do
{
database_infos.clear();
table_infos.clear();
consistent = true;
/// Collect information about databases and tables specified in the BACKUP query.
for (const auto & element : backup_query_elements)
{
switch (element.type)
{
case ASTBackupQuery::ElementType::TABLE:
{
collectTableInfo({element.database_name, element.table_name}, false, element.partitions, true);
break;
}
case ASTBackupQuery::ElementType::TEMPORARY_TABLE:
{
collectTableInfo({"", element.table_name}, true, element.partitions, true);
break;
}
case ASTBackupQuery::ElementType::DATABASE:
{
collectDatabaseInfo(element.database_name, element.except_tables, true);
break;
}
case ASTBackupQuery::ElementType::ALL:
{
collectAllDatabasesInfo(element.except_databases, element.except_tables);
break;
}
}
}
/// We have to check consistency of collected information to protect from the case when some table or database is
/// renamed during this collecting making the collected information invalid.
checkConsistency();
/// Two passes is absolute minimum (see `previous_table_names` & `previous_database_names`).
auto elapsed = std::chrono::steady_clock::now() - start_time;
if (!consistent && (pass >= 2) && use_timeout)
{
if (elapsed > timeout)
throw Exception(
ErrorCodes::CANNOT_COLLECT_OBJECTS_FOR_BACKUP,
"Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})",
pass,
to_string(elapsed));
}
if (pass >= 2)
LOG_WARNING(log, "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed));
++pass;
} while (!consistent);
LOG_INFO(log, "Will backup {} databases and {} tables", database_infos.size(), table_infos.size());
}
void BackupEntriesCollector::collectTableInfo(
const QualifiedTableName & table_name, bool is_temporary_table, const std::optional<ASTs> & partitions, bool throw_if_not_found)
{
/// Gather information about the table.
DatabasePtr database;
StoragePtr storage;
TableLockHolder table_lock;
ASTPtr create_table_query;
TableKey table_key{table_name, is_temporary_table};
if (throw_if_not_found)
{
auto resolved_id = is_temporary_table
? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal)
: context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal);
std::tie(database, storage) = DatabaseCatalog::instance().getDatabaseAndTable(resolved_id, context);
table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
create_table_query = storage->getCreateQueryForBackup(*this);
}
else
{
auto resolved_id = is_temporary_table
? context->tryResolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal)
: context->tryResolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal);
if (!resolved_id.empty())
std::tie(database, storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable(resolved_id, context);
if (storage)
{
try
{
table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
create_table_query = storage->getCreateQueryForBackup(*this);
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::TABLE_IS_DROPPED)
throw;
}
}
if (!create_table_query)
{
consistent &= !table_infos.contains(table_key);
return;
}
}
fs::path data_path_in_backup;
if (is_temporary_table)
{
auto table_name_in_backup = renaming_map.getNewTemporaryTableName(table_name.table);
data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup);
}
else
{
auto table_name_in_backup = renaming_map.getNewTableName(table_name);
data_path_in_backup
= root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table);
}
/// Check that information is consistent.
const auto & create = create_table_query->as<const ASTCreateQuery &>();
if ((create.getTable() != table_name.table) || (is_temporary_table != create.temporary) || (create.getDatabase() != table_name.database))
{
/// Table was renamed recently.
consistent = false;
return;
}
if (auto it = table_infos.find(table_key); it != table_infos.end())
{
const auto & table_info = it->second;
if ((table_info.database != database) || (table_info.storage != storage))
{
/// Table was renamed recently.
consistent = false;
return;
}
}
/// Add information to `table_infos`.
auto & res_table_info = table_infos[table_key];
res_table_info.database = database;
res_table_info.storage = storage;
res_table_info.table_lock = table_lock;
res_table_info.create_table_query = create_table_query;
res_table_info.data_path_in_backup = data_path_in_backup;
if (partitions)
{
if (!res_table_info.partitions)
res_table_info.partitions.emplace();
insertAtEnd(*res_table_info.partitions, *partitions);
}
}
void BackupEntriesCollector::collectDatabaseInfo(const String & database_name, const std::set<DatabaseAndTableName> & except_table_names, bool throw_if_not_found)
{
/// Gather information about the database.
DatabasePtr database;
ASTPtr create_database_query;
if (throw_if_not_found)
{
database = DatabaseCatalog::instance().getDatabase(database_name);
create_database_query = database->getCreateDatabaseQueryForBackup();
}
else
{
database = DatabaseCatalog::instance().tryGetDatabase(database_name);
if (!database)
{
consistent &= !database_infos.contains(database_name);
return;
}
try
{
create_database_query = database->getCreateDatabaseQueryForBackup();
}
catch (...)
{
/// The database has been dropped recently.
consistent &= !database_infos.contains(database_name);
return;
}
}
/// Check that information is consistent.
const auto & create = create_database_query->as<const ASTCreateQuery &>();
if (create.getDatabase() != database_name)
{
/// Database was renamed recently.
consistent = false;
return;
}
if (auto it = database_infos.find(database_name); it != database_infos.end())
{
const auto & database_info = it->second;
if (database_info.database != database)
{
/// Database was renamed recently.
consistent = false;
return;
}
}
/// Add information to `database_infos`.
auto & res_database_info = database_infos[database_name];
res_database_info.database = database;
res_database_info.create_database_query = create_database_query;
/// Add information about tables too.
for (auto it = database->getTablesIteratorForBackup(*this); it->isValid(); it->next())
{
if (except_table_names.contains({database_name, it->name()}))
continue;
collectTableInfo({database_name, it->name()}, /* is_temporary_table= */ false, {}, /* throw_if_not_found= */ false);
if (!consistent)
return;
}
}
void BackupEntriesCollector::collectAllDatabasesInfo(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names)
{
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
{
if (except_database_names.contains(database_name))
continue;
collectDatabaseInfo(database_name, except_table_names, false);
if (!consistent)
return;
}
}
/// Check for consistency of collected information about databases and tables.
void BackupEntriesCollector::checkConsistency()
{
if (!consistent)
return; /// Already inconsistent, no more checks necessary
/// Databases found while we were scanning tables and while we were scanning databases - must be the same.
for (const auto & [key, table_info] : table_infos)
{
auto it = database_infos.find(key.name.database);
if (it != database_infos.end())
{
const auto & database_info = it->second;
if (database_info.database != table_info.database)
{
consistent = false;
return;
}
}
}
/// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed
/// while we were scanning.
std::set<String> database_names;
std::set<TableKey> table_names;
boost::range::copy(database_infos | boost::adaptors::map_keys, std::inserter(database_names, database_names.end()));
boost::range::copy(table_infos | boost::adaptors::map_keys, std::inserter(table_names, table_names.end()));
if (!previous_database_names || !previous_table_names || (*previous_database_names != database_names)
|| (*previous_table_names != table_names))
{
previous_database_names = std::move(database_names);
previous_table_names = std::move(table_names);
consistent = false;
}
}
/// Make backup entries for all the definitions of all the databases found.
void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs()
{
for (const auto & [database_name, database_info] : database_infos)
{
LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name));
ASTPtr new_create_query = database_info.create_database_query;
renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query);
String new_database_name = renaming_map.getNewDatabaseName(database_name);
auto metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql");
backup_entries.emplace_back(metadata_path_in_backup, std::make_shared<BackupEntryFromMemory>(serializeAST(*new_create_query)));
}
}
/// Calls IDatabase::backupTable() for all the tables found to make backup entries for tables.
void BackupEntriesCollector::makeBackupEntriesForTablesDefs()
{
for (const auto & [key, table_info] : table_infos)
{
LOG_TRACE(log, "Adding definition of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName());
ASTPtr new_create_query = table_info.create_table_query;
renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query);
fs::path metadata_path_in_backup;
if (key.is_temporary)
{
auto new_name = renaming_map.getNewTemporaryTableName(key.name.table);
metadata_path_in_backup = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(new_name) + ".sql");
}
else
{
auto new_name = renaming_map.getNewTableName(key.name);
metadata_path_in_backup
= root_path_in_backup / "metadata" / escapeForFileName(new_name.database) / (escapeForFileName(new_name.table) + ".sql");
}
backup_entries.emplace_back(metadata_path_in_backup, std::make_shared<BackupEntryFromMemory>(serializeAST(*new_create_query)));
}
}
void BackupEntriesCollector::makeBackupEntriesForTablesData()
{
if (backup_settings.structure_only)
return;
for (const auto & [key, table_info] : table_infos)
{
LOG_TRACE(log, "Adding data of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName());
const auto & storage = table_info.storage;
const auto & data_path_in_backup = table_info.data_path_in_backup;
const auto & partitions = table_info.partitions;
storage->backupData(*this, data_path_in_backup, partitions);
}
}
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
backup_entries.emplace_back(file_name, backup_entry);
}
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
insertAtEnd(backup_entries, backup_entries_);
}
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
insertAtEnd(backup_entries, std::move(backup_entries_));
}
void BackupEntriesCollector::addPostCollectingTask(std::function<void()> task)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding post tasks is not allowed");
post_collecting_tasks.push(std::move(task));
}
/// Runs all the tasks added with addPostCollectingTask().
void BackupEntriesCollector::runPostCollectingTasks()
{
/// Post collecting tasks can add other post collecting tasks, our code is fine with that.
while (!post_collecting_tasks.empty())
{
auto task = std::move(post_collecting_tasks.front());
post_collecting_tasks.pop();
std::move(task)();
}
}
void BackupEntriesCollector::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine)
{
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Table engine {} doesn't support partitions, cannot backup table {}",
table_engine,
storage_id.getFullTableName());
}
}

View File

@ -0,0 +1,138 @@
#pragma once
#include <Backups/BackupSettings.h>
#include <Databases/DDLRenamingVisitor.h>
#include <Core/QualifiedTableName.h>
#include <Parsers/ASTBackupQuery.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/TableLockHolder.h>
#include <filesystem>
namespace DB
{
class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
class IBackupCoordination;
class IDatabase;
using DatabasePtr = std::shared_ptr<IDatabase>;
struct StorageID;
/// Collects backup entries for all databases and tables which should be put to a backup.
class BackupEntriesCollector : private boost::noncopyable
{
public:
BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ContextPtr & context_,
std::chrono::seconds timeout_ = std::chrono::seconds(-1) /* no timeout */);
~BackupEntriesCollector();
/// Collects backup entries and returns the result.
/// This function first generates a list of databases and then call IDatabase::backup() for each database from this list.
/// At this moment IDatabase::backup() calls IStorage::backup() and they both call addBackupEntry() to build a list of backup entries.
BackupEntries getBackupEntries();
const BackupSettings & getBackupSettings() const { return backup_settings; }
std::shared_ptr<IBackupCoordination> getBackupCoordination() const { return backup_coordination; }
ContextPtr getContext() const { return context; }
/// Adds a backup entry which will be later returned by getBackupEntries().
/// These function can be called by implementations of IStorage::backup() in inherited storage classes.
void addBackupEntry(const String & file_name, BackupEntryPtr backup_entry);
void addBackupEntries(const BackupEntries & backup_entries_);
void addBackupEntries(BackupEntries && backup_entries_);
/// Adds a function which must be called after all IStorage::backup() have finished their work on all hosts.
/// This function is designed to help making a consistent in some complex cases like
/// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts.
void addPostCollectingTask(std::function<void()> task);
/// Writing a backup includes a few stages:
enum class Stage
{
/// Initial stage.
kPreparing,
/// Finding all tables and databases which we're going to put to the backup.
kFindingTables,
/// Making temporary hard links and prepare backup entries.
kExtractingDataFromTables,
/// Running special tasks for replicated databases or tables which can also prepare some backup entries.
kRunningPostTasks,
/// Writing backup entries to the backup and removing temporary hard links.
kWritingBackup,
/// An error happens during any of the stages above, the backup won't be written.
kError,
};
static std::string_view toString(Stage stage);
/// Throws an exception that a specified table engine doesn't support partitions.
[[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine);
private:
void setStage(Stage new_stage, const String & error_message = {});
void calculateRootPathInBackup();
void collectDatabasesAndTablesInfo();
void collectTableInfo(const QualifiedTableName & table_name, bool is_temporary_table, const std::optional<ASTs> & partitions, bool throw_if_not_found);
void collectDatabaseInfo(const String & database_name, const std::set<DatabaseAndTableName> & except_table_names, bool throw_if_not_found);
void collectAllDatabasesInfo(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names);
void checkConsistency();
void makeBackupEntriesForDatabasesDefs();
void makeBackupEntriesForTablesDefs();
void makeBackupEntriesForTablesData();
void runPostCollectingTasks();
const ASTBackupQuery::Elements backup_query_elements;
const BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
ContextPtr context;
std::chrono::seconds timeout;
Poco::Logger * log;
Stage current_stage = Stage::kPreparing;
std::filesystem::path root_path_in_backup;
DDLRenamingMap renaming_map;
struct DatabaseInfo
{
DatabasePtr database;
ASTPtr create_database_query;
};
struct TableInfo
{
DatabasePtr database;
StoragePtr storage;
TableLockHolder table_lock;
ASTPtr create_table_query;
std::filesystem::path data_path_in_backup;
std::optional<ASTs> partitions;
};
struct TableKey
{
QualifiedTableName name;
bool is_temporary = false;
bool operator ==(const TableKey & right) const;
bool operator <(const TableKey & right) const;
};
std::unordered_map<String, DatabaseInfo> database_infos;
std::map<TableKey, TableInfo> table_infos;
std::optional<std::set<String>> previous_database_names;
std::optional<std::set<TableKey>> previous_table_names;
bool consistent = false;
BackupEntries backup_entries;
std::queue<std::function<void()>> post_collecting_tasks;
};
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/IBackupEntry.h>
#include <base/defines.h>
#include <mutex>
namespace Poco { class TemporaryFile; }
@ -41,7 +42,7 @@ public:
private:
const DiskPtr disk;
const String file_path;
mutable std::optional<UInt64> file_size;
mutable std::optional<UInt64> file_size TSA_GUARDED_BY(get_file_size_mutex);
mutable std::mutex get_file_size_mutex;
const std::optional<UInt128> checksum;
const std::shared_ptr<Poco::TemporaryFile> temporary_file;

View File

@ -36,7 +36,7 @@ namespace ErrorCodes
extern const int WRONG_BASE_BACKUP;
extern const int BACKUP_ENTRY_ALREADY_EXISTS;
extern const int BACKUP_ENTRY_NOT_FOUND;
extern const int BAD_ARGUMENTS;
extern const int BACKUP_IS_EMPTY;
extern const int LOGICAL_ERROR;
}
@ -65,6 +65,14 @@ namespace
{
return hexChecksum(size_and_checksum.second) + std::to_string(size_and_checksum.first);
}
/// We store entries' file names in the backup without leading slashes.
String removeLeadingSlash(const String & path)
{
if (path.starts_with('/'))
return path.substr(1);
return path;
}
}
@ -151,7 +159,7 @@ BackupImpl::BackupImpl(
, uuid(backup_uuid_)
, version(CURRENT_BACKUP_VERSION)
, base_backup_info(base_backup_info_)
, log(&Poco::Logger::get("Backup"))
, log(&Poco::Logger::get("BackupImpl"))
{
open(context_);
}
@ -218,13 +226,6 @@ void BackupImpl::close()
{
std::lock_guard lock{mutex};
if (!is_internal_backup && writing_finalized)
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
writeBackupMetadata();
LOG_INFO(log, "Finalized backup {}", backup_name);
}
archive_readers.clear();
for (auto & archive_writer : archive_writers)
archive_writer = {"", nullptr};
@ -249,10 +250,12 @@ void BackupImpl::writeBackupMetadata()
config->setString("timestamp", toString(LocalDateTime{timestamp}));
config->setString("uuid", toString(*uuid));
auto all_file_infos = coordination->getAllFileInfos();
if (base_backup_info)
{
bool base_backup_in_use = false;
for (const auto & info : coordination->getAllFileInfos())
for (const auto & info : all_file_infos)
{
if (info.base_size)
base_backup_in_use = true;
@ -266,13 +269,13 @@ void BackupImpl::writeBackupMetadata()
}
size_t index = 0;
for (const auto & info : coordination->getAllFileInfos())
for (const auto & info : all_file_infos)
{
String prefix = index ? "contents.file[" + std::to_string(index) + "]." : "contents.file.";
config->setString(prefix + "name", info.file_name);
config->setUInt(prefix + "size", info.size);
if (info.size)
{
config->setString(prefix + "name", info.file_name);
config->setString(prefix + "checksum", hexChecksum(info.checksum));
if (info.base_size)
{
@ -303,6 +306,7 @@ void BackupImpl::writeBackupMetadata()
else
out = writer->writeFile(".backup");
out->write(str.data(), str.size());
out->finalize();
}
void BackupImpl::readBackupMetadata()
@ -375,18 +379,25 @@ void BackupImpl::readBackupMetadata()
}
}
Strings BackupImpl::listFiles(const String & prefix, const String & terminator) const
Strings BackupImpl::listFiles(const String & directory, bool recursive) const
{
std::lock_guard lock{mutex};
if (!prefix.ends_with('/') && !prefix.empty())
throw Exception("prefix should end with '/'", ErrorCodes::BAD_ARGUMENTS);
return coordination->listFiles(prefix, terminator);
auto adjusted_dir = removeLeadingSlash(directory);
return coordination->listFiles(adjusted_dir, recursive);
}
bool BackupImpl::hasFiles(const String & directory) const
{
std::lock_guard lock{mutex};
auto adjusted_dir = removeLeadingSlash(directory);
return coordination->hasFiles(adjusted_dir);
}
bool BackupImpl::fileExists(const String & file_name) const
{
std::lock_guard lock{mutex};
return coordination->getFileInfo(file_name).has_value();
auto adjusted_path = removeLeadingSlash(file_name);
return coordination->getFileInfo(adjusted_path).has_value();
}
bool BackupImpl::fileExists(const SizeAndChecksum & size_and_checksum) const
@ -398,7 +409,8 @@ bool BackupImpl::fileExists(const SizeAndChecksum & size_and_checksum) const
UInt64 BackupImpl::getFileSize(const String & file_name) const
{
std::lock_guard lock{mutex};
auto info = coordination->getFileInfo(file_name);
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
@ -408,7 +420,8 @@ UInt64 BackupImpl::getFileSize(const String & file_name) const
UInt128 BackupImpl::getFileChecksum(const String & file_name) const
{
std::lock_guard lock{mutex};
auto info = coordination->getFileInfo(file_name);
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
@ -418,7 +431,8 @@ UInt128 BackupImpl::getFileChecksum(const String & file_name) const
SizeAndChecksum BackupImpl::getFileSizeAndChecksum(const String & file_name) const
{
std::lock_guard lock{mutex};
auto info = coordination->getFileInfo(file_name);
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
@ -436,17 +450,18 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
if (open_mode != OpenMode::READ)
throw Exception("Backup is not opened for reading", ErrorCodes::LOGICAL_ERROR);
if (!size_and_checksum.first)
{
/// Entry's data is empty.
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
}
auto info_opt = coordination->getFileInfo(size_and_checksum);
if (!info_opt)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, formatSizeAndChecksum(size_and_checksum));
const auto & info = *info_opt;
if (!info.size)
{
/// Entry's data is empty.
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
}
if (!info.base_size)
{
@ -494,12 +509,16 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
if (open_mode != OpenMode::WRITE)
throw Exception("Backup is not opened for writing", ErrorCodes::LOGICAL_ERROR);
if (coordination->getFileInfo(file_name))
if (writing_finalized)
throw Exception("Backup is already finalized", ErrorCodes::LOGICAL_ERROR);
auto adjusted_path = removeLeadingSlash(file_name);
if (coordination->getFileInfo(adjusted_path))
throw Exception(
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name, quoteString(file_name));
FileInfo info;
info.file_name = file_name;
info.file_name = adjusted_path;
size_t size = entry->getSize();
info.size = size;
@ -520,13 +539,13 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
}
/// Check if a entry with such name exists in the base backup.
bool base_exists = (base_backup && base_backup->fileExists(file_name));
bool base_exists = (base_backup && base_backup->fileExists(adjusted_path));
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
if (base_exists)
{
base_size = base_backup->getFileSize(file_name);
base_checksum = base_backup->getFileChecksum(file_name);
base_size = base_backup->getFileSize(adjusted_path);
base_checksum = base_backup->getFileChecksum(adjusted_path);
}
std::unique_ptr<SeekableReadBuffer> read_buffer; /// We'll set that later.
@ -647,6 +666,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
}
copyData(*read_buffer, *out);
out->finalize();
}
@ -656,6 +676,19 @@ void BackupImpl::finalizeWriting()
if (open_mode != OpenMode::WRITE)
throw Exception("Backup is not opened for writing", ErrorCodes::LOGICAL_ERROR);
if (writing_finalized)
throw Exception("Backup is already finalized", ErrorCodes::LOGICAL_ERROR);
if (!coordination->hasFiles(""))
throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY);
if (!is_internal_backup)
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
writeBackupMetadata();
LOG_TRACE(log, "Finalized backup {}", backup_name);
}
writing_finalized = true;
}

View File

@ -57,7 +57,8 @@ public:
OpenMode getOpenMode() const override { return open_mode; }
time_t getTimestamp() const override;
UUID getUUID() const override { return *uuid; }
Strings listFiles(const String & prefix, const String & terminator) const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
bool fileExists(const String & file_name) const override;
bool fileExists(const SizeAndChecksum & size_and_checksum) const override;
UInt64 getFileSize(const String & file_name) const override;

View File

@ -1,437 +1,60 @@
#include <Backups/BackupUtils.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/BackupSettings.h>
#include <Backups/DDLCompareUtils.h>
#include <Backups/DDLRenamingVisitor.h>
#include <Backups/IBackup.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/formatTableNameOrTemporaryTableName.h>
#include <Backups/replaceTableUUIDWithMacroInReplicatedTableDef.h>
#include <Common/escapeForFileName.h>
#include <Backups/RestoreSettings.h>
#include <Access/Common/AccessRightsElement.h>
#include <Databases/IDatabase.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/formatAST.h>
#include <Storages/IStorage.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Databases/DDLRenamingVisitor.h>
#include <Interpreters/DatabaseCatalog.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_BACKUP_TABLE;
extern const int CANNOT_BACKUP_DATABASE;
extern const int BACKUP_IS_EMPTY;
extern const int LOGICAL_ERROR;
}
namespace
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements)
{
/// Helper to calculate paths inside a backup.
class PathsInBackup
DDLRenamingMap map;
for (const auto & element : elements)
{
public:
/// Returns the path to metadata in backup.
static String getMetadataPath(const DatabaseAndTableName & table_name, size_t shard_index, size_t replica_index)
switch (element.type)
{
if (table_name.first.empty() || table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
return getPathForShardAndReplica(shard_index, replica_index) + String{"metadata/"} + escapeForFileName(table_name.first) + "/"
+ escapeForFileName(table_name.second) + ".sql";
}
static String getMetadataPath(const String & database_name, size_t shard_index, size_t replica_index)
{
if (database_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name must not be empty");
return getPathForShardAndReplica(shard_index, replica_index) + String{"metadata/"} + escapeForFileName(database_name) + ".sql";
}
static String getMetadataPath(const IAST & create_query, size_t shard_index, size_t replica_index)
{
const auto & create = create_query.as<const ASTCreateQuery &>();
if (!create.table)
return getMetadataPath(create.getDatabase(), shard_index, replica_index);
if (create.temporary)
return getMetadataPath({DatabaseCatalog::TEMPORARY_DATABASE, create.getTable()}, shard_index, replica_index);
return getMetadataPath({create.getDatabase(), create.getTable()}, shard_index, replica_index);
}
/// Returns the path to table's data in backup.
static String getDataPath(const DatabaseAndTableName & table_name, size_t shard_index, size_t replica_index)
{
if (table_name.first.empty() || table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
assert(!table_name.first.empty() && !table_name.second.empty());
return getPathForShardAndReplica(shard_index, replica_index) + String{"data/"} + escapeForFileName(table_name.first) + "/"
+ escapeForFileName(table_name.second) + "/";
}
static String getDataPath(const IAST & create_query, size_t shard_index, size_t replica_index)
{
const auto & create = create_query.as<const ASTCreateQuery &>();
if (!create.table)
return {};
if (create.temporary)
return getDataPath({DatabaseCatalog::TEMPORARY_DATABASE, create.getTable()}, shard_index, replica_index);
return getDataPath({create.getDatabase(), create.getTable()}, shard_index, replica_index);
}
private:
static String getPathForShardAndReplica(size_t shard_index, size_t replica_index)
{
if (shard_index || replica_index)
return fmt::format("shards/{}/replicas/{}/", shard_index, replica_index);
else
return "";
}
};
using Kind = ASTBackupQuery::Kind;
using Element = ASTBackupQuery::Element;
using Elements = ASTBackupQuery::Elements;
using ElementType = ASTBackupQuery::ElementType;
/// Makes backup entries to backup databases and tables according to the elements of ASTBackupQuery.
/// Keep this class consistent with RestoreTasksBuilder.
class BackupEntriesBuilder
{
public:
BackupEntriesBuilder(const ContextPtr & context_, const BackupSettings & backup_settings_, std::shared_ptr<IBackupCoordination> backup_coordination_)
: context(context_), backup_settings(backup_settings_), backup_coordination(backup_coordination_)
{
}
/// Prepares internal structures for making backup entries.
void prepare(const ASTBackupQuery::Elements & elements, std::chrono::seconds timeout_for_other_nodes_to_prepare)
{
try
case ASTBackupQuery::TABLE:
{
prepareImpl(elements);
}
catch (...)
{
backup_coordination->finishPreparing(backup_settings.host_id, getCurrentExceptionMessage(false));
throw;
const String & table_name = element.table_name;
const String & database_name = element.database_name;
const String & new_table_name = element.new_table_name;
const String & new_database_name = element.new_database_name;
assert(!table_name.empty());
assert(!new_table_name.empty());
assert(!database_name.empty());
assert(!new_database_name.empty());
map.setNewTableName({database_name, table_name}, {new_database_name, new_table_name});
break;
}
/// We've finished restoring metadata, now we will wait for other replicas and shards to finish too.
/// We need this waiting because we're going to call some functions which requires data collected from other nodes too,
/// see IRestoreCoordination::checkTablesNotExistedInReplicatedDBs(), IRestoreCoordination::getReplicatedTableDataPath().
backup_coordination->finishPreparing(backup_settings.host_id);
backup_coordination->waitForAllHostsPrepared(
BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num),
timeout_for_other_nodes_to_prepare);
}
/// Makes backup entries, should be called after prepare().
BackupEntries makeBackupEntries() const
{
BackupEntries res;
for (const auto & info : databases | boost::adaptors::map_values)
res.push_back(makeBackupEntryForMetadata(*info.create_query));
for (const auto & info : tables | boost::adaptors::map_values)
case ASTBackupQuery::TEMPORARY_TABLE:
{
res.push_back(makeBackupEntryForMetadata(*info.create_query));
appendBackupEntriesForData(res, info);
const String & table_name = element.table_name;
const String & new_table_name = element.new_table_name;
assert(!table_name.empty());
assert(!new_table_name.empty());
map.setNewTemporaryTableName(table_name, new_table_name);
break;
}
/// A backup cannot be empty.
if (res.empty())
throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY);
return res;
}
private:
void prepareImpl(const ASTBackupQuery::Elements & elements)
{
calculateShardNumAndReplicaNumInBackup();
renaming_settings.setFromBackupQuery(elements);
for (const auto & element : elements)
case ASTBackupQuery::DATABASE:
{
switch (element.type)
{
case ElementType::TABLE:
{
prepareToBackupTable(element.name, element.partitions);
break;
}
case ElementType::DATABASE:
{
const String & database_name = element.name.first;
prepareToBackupDatabase(database_name, element.except_list);
break;
}
case ElementType::ALL_DATABASES:
{
prepareToBackupAllDatabases(element.except_list);
break;
}
}
}
}
void calculateShardNumAndReplicaNumInBackup()
{
size_t shard_num = 0;
size_t replica_num = 0;
if (!backup_settings.host_id.empty())
{
std::tie(shard_num, replica_num)
= BackupSettings::Util::findShardNumAndReplicaNum(backup_settings.cluster_host_ids, backup_settings.host_id);
}
shard_num_in_backup = shard_num;
replica_num_in_backup = replica_num;
}
/// Prepares to backup a single table and probably its database's definition.
void prepareToBackupTable(const DatabaseAndTableName & table_name_, const ASTs & partitions_)
{
auto [database, storage] = DatabaseCatalog::instance().getDatabaseAndTable({table_name_.first, table_name_.second}, context);
prepareToBackupTable(table_name_, {database, storage}, partitions_);
}
void prepareToBackupTable(const DatabaseAndTableName & table_name_, const DatabaseAndTable & table_, const ASTs & partitions_)
{
const auto & database = table_.first;
const auto & storage = table_.second;
if (!database->hasTablesToBackup())
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Cannot backup the {} because it's contained in a hollow database (engine: {})",
formatTableNameOrTemporaryTableName(table_name_),
database->getEngineName());
/// Check that we are not trying to backup the same table again.
DatabaseAndTableName name_in_backup = renaming_settings.getNewTableName(table_name_);
if (tables.contains(name_in_backup))
throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, "Cannot backup the {} twice", formatTableNameOrTemporaryTableName(name_in_backup));
/// Make a create query for this table.
auto create_query = prepareCreateQueryForBackup(database->getCreateTableQuery(table_name_.second, context));
String data_path = PathsInBackup::getDataPath(*create_query, shard_num_in_backup, replica_num_in_backup);
String zk_path;
BackupEntries data = prepareToBackupTableData(table_name_, storage, partitions_, data_path, zk_path);
TableInfo info;
info.table_name = table_name_;
info.create_query = create_query;
info.storage = storage;
info.data = std::move(data);
info.data_path = std::move(data_path);
info.zk_path = std::move(zk_path);
tables[name_in_backup] = std::move(info);
}
BackupEntries prepareToBackupTableData(const DatabaseAndTableName & table_name_, const StoragePtr & storage_, const ASTs & partitions_, const String & data_path, String & zk_path)
{
zk_path.clear();
const StorageReplicatedMergeTree * replicated_table = typeid_cast<const StorageReplicatedMergeTree *>(storage_.get());
bool has_data = (storage_->hasDataToBackup() || replicated_table) && !backup_settings.structure_only;
if (!has_data)
return {};
BackupEntries data = storage_->backupData(context, partitions_);
if (!replicated_table)
return data;
zk_path = replicated_table->getZooKeeperName() + replicated_table->getZooKeeperPath();
backup_coordination->addReplicatedTableDataPath(zk_path, data_path);
std::unordered_map<String, SipHash> parts;
for (const auto & [relative_path, backup_entry] : data)
{
size_t slash_pos = relative_path.find('/');
if (slash_pos != String::npos)
{
String part_name = relative_path.substr(0, slash_pos);
if (MergeTreePartInfo::tryParsePartName(part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING))
{
auto & hash = parts[part_name];
if (relative_path.ends_with(".bin"))
{
auto checksum = backup_entry->getChecksum();
hash.update(relative_path);
hash.update(backup_entry->getSize());
hash.update(*checksum);
}
}
}
const String & database_name = element.database_name;
const String & new_database_name = element.new_database_name;
assert(!database_name.empty());
assert(!new_database_name.empty());
map.setNewDatabaseName(database_name, new_database_name);
break;
}
std::vector<IBackupCoordination::PartNameAndChecksum> part_names_and_checksums;
part_names_and_checksums.reserve(parts.size());
for (auto & [part_name, hash] : parts)
{
UInt128 checksum;
hash.get128(checksum);
auto & part_name_and_checksum = part_names_and_checksums.emplace_back();
part_name_and_checksum.part_name = part_name;
part_name_and_checksum.checksum = checksum;
}
backup_coordination->addReplicatedTablePartNames(backup_settings.host_id, table_name_, zk_path, part_names_and_checksums);
return data;
case ASTBackupQuery::ALL: break;
}
/// Prepares to restore a database and all tables in it.
void prepareToBackupDatabase(const String & database_name_, const std::set<String> & except_list_)
{
auto database = DatabaseCatalog::instance().getDatabase(database_name_, context);
prepareToBackupDatabase(database_name_, database, except_list_);
}
void prepareToBackupDatabase(const String & database_name_, const DatabasePtr & database_, const std::set<String> & except_list_)
{
/// Check that we are not trying to restore the same database again.
String name_in_backup = renaming_settings.getNewDatabaseName(database_name_);
if (databases.contains(name_in_backup))
throw Exception(ErrorCodes::CANNOT_BACKUP_DATABASE, "Cannot backup the database {} twice", backQuoteIfNeed(name_in_backup));
/// Of course we're not going to backup the definition of the system or the temporary database.
if (!isSystemOrTemporaryDatabase(database_name_))
{
/// Make a create query for this database.
auto create_query = prepareCreateQueryForBackup(database_->getCreateDatabaseQuery());
DatabaseInfo info;
info.create_query = create_query;
databases[name_in_backup] = std::move(info);
}
/// Backup tables in this database.
if (database_->hasTablesToBackup())
{
for (auto it = database_->getTablesIterator(context); it->isValid(); it->next())
{
if (except_list_.contains(it->name()))
continue;
prepareToBackupTable({database_name_, it->name()}, {database_, it->table()}, {});
}
}
}
/// Prepares to backup all the databases contained in the backup.
void prepareToBackupAllDatabases(const std::set<String> & except_list_)
{
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
{
if (except_list_.contains(database_name))
continue;
if (isSystemOrTemporaryDatabase(database_name))
continue;
prepareToBackupDatabase(database_name, database, {});
}
}
/// Do renaming in the create query according to the renaming config.
std::shared_ptr<ASTCreateQuery> prepareCreateQueryForBackup(const ASTPtr & ast) const
{
ASTPtr query = ast;
::DB::renameInCreateQuery(query, context, renaming_settings);
auto create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(query);
replaceTableUUIDWithMacroInReplicatedTableDef(*create_query, create_query->uuid);
create_query->uuid = UUIDHelpers::Nil;
create_query->to_inner_uuid = UUIDHelpers::Nil;
return create_query;
}
static bool isSystemOrTemporaryDatabase(const String & database_name)
{
return (database_name == DatabaseCatalog::SYSTEM_DATABASE) || (database_name == DatabaseCatalog::TEMPORARY_DATABASE);
}
std::pair<String, BackupEntryPtr> makeBackupEntryForMetadata(const IAST & create_query) const
{
auto metadata_entry = std::make_unique<BackupEntryFromMemory>(serializeAST(create_query));
String metadata_path = PathsInBackup::getMetadataPath(create_query, shard_num_in_backup, replica_num_in_backup);
return {metadata_path, std::move(metadata_entry)};
}
struct TableInfo;
void appendBackupEntriesForData(BackupEntries & res, const TableInfo & info) const
{
if (info.zk_path.empty())
{
for (const auto & [relative_path, backup_entry] : info.data)
res.emplace_back(info.data_path + relative_path, backup_entry);
return;
}
Strings data_paths = backup_coordination->getReplicatedTableDataPaths(info.zk_path);
Strings part_names = backup_coordination->getReplicatedTablePartNames(backup_settings.host_id, info.table_name, info.zk_path);
std::unordered_set<std::string_view> part_names_set{part_names.begin(), part_names.end()};
for (const auto & [relative_path, backup_entry] : info.data)
{
size_t slash_pos = relative_path.find('/');
if (slash_pos != String::npos)
{
String part_name = relative_path.substr(0, slash_pos);
if (MergeTreePartInfo::tryParsePartName(part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING))
{
if (!part_names_set.contains(part_name))
continue;
for (const auto & data_path : data_paths)
res.emplace_back(data_path + relative_path, backup_entry);
continue;
}
}
res.emplace_back(info.data_path + relative_path, backup_entry);
}
}
/// Information which is used to make an instance of RestoreTableFromBackupTask.
struct TableInfo
{
DatabaseAndTableName table_name;
ASTPtr create_query;
StoragePtr storage;
BackupEntries data;
String data_path;
String zk_path;
};
/// Information which is used to make an instance of RestoreDatabaseFromBackupTask.
struct DatabaseInfo
{
ASTPtr create_query;
};
ContextPtr context;
BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
size_t shard_num_in_backup = 0;
size_t replica_num_in_backup = 0;
DDLRenamingSettings renaming_settings;
std::unordered_map<String /* db_name_in_backup */, DatabaseInfo> databases;
std::map<DatabaseAndTableName /* table_name_in_backup */, TableInfo> tables;
};
}
BackupEntries makeBackupEntries(
const ContextPtr & context,
const Elements & elements,
const BackupSettings & backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
std::chrono::seconds timeout_for_other_nodes_to_prepare)
{
BackupEntriesBuilder builder{context, backup_settings, backup_coordination};
builder.prepare(elements, timeout_for_other_nodes_to_prepare);
return builder.makeBackupEntries();
}
return map;
}
@ -499,13 +122,73 @@ void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries
/// And IBackup's implementation should remove the backup in its destructor if finalizeWriting() hasn't called before.
std::rethrow_exception(exception);
}
}
backup->finalizeWriting();
void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool)
{
size_t num_active_jobs = 0;
std::mutex mutex;
std::condition_variable event;
std::exception_ptr exception;
for (auto & task : tasks)
{
{
std::unique_lock lock{mutex};
if (exception)
break;
++num_active_jobs;
}
auto job = [&]()
{
SCOPE_EXIT({
std::lock_guard lock{mutex};
if (!--num_active_jobs)
event.notify_all();
});
{
std::lock_guard lock{mutex};
if (exception)
return;
}
try
{
std::move(task)();
}
catch (...)
{
std::lock_guard lock{mutex};
if (!exception)
exception = std::current_exception();
}
};
if (!thread_pool.trySchedule(job))
job();
}
{
std::unique_lock lock{mutex};
event.wait(lock, [&] { return !num_active_jobs; });
}
tasks.clear();
if (exception)
{
/// We don't call finalizeWriting() if an error occurs.
/// And IBackup's implementation should remove the backup in its destructor if finalizeWriting() hasn't called before.
std::rethrow_exception(exception);
}
}
/// Returns access required to execute BACKUP query.
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements, const BackupSettings & backup_settings)
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements)
{
AccessRightsElements required_access;
for (const auto & element : elements)
@ -514,32 +197,27 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements &
{
case ASTBackupQuery::TABLE:
{
if (element.is_temp_db)
break;
AccessFlags flags = AccessType::SHOW_TABLES;
if (!backup_settings.structure_only)
flags |= AccessType::SELECT;
required_access.emplace_back(flags, element.name.first, element.name.second);
required_access.emplace_back(AccessType::BACKUP, element.database_name, element.table_name);
break;
}
case ASTBackupQuery::TEMPORARY_TABLE:
{
/// It's always allowed to backup temporary tables.
break;
}
case ASTBackupQuery::DATABASE:
{
if (element.is_temp_db)
break;
AccessFlags flags = AccessType::SHOW_TABLES | AccessType::SHOW_DATABASES;
if (!backup_settings.structure_only)
flags |= AccessType::SELECT;
required_access.emplace_back(flags, element.name.first);
/// TODO: It's better to process `element.except_list` somehow.
/// TODO: It's better to process `element.except_tables` somehow.
required_access.emplace_back(AccessType::BACKUP, element.database_name);
break;
}
case ASTBackupQuery::ALL_DATABASES:
case ASTBackupQuery::ALL:
{
AccessFlags flags = AccessType::SHOW_TABLES | AccessType::SHOW_DATABASES;
if (!backup_settings.structure_only)
flags |= AccessType::SELECT;
required_access.emplace_back(flags);
/// TODO: It's better to process `element.except_list` somehow.
/// TODO: It's better to process `element.except_databases` & `element.except_tables` somehow.
required_access.emplace_back(AccessType::BACKUP);
break;
}
}

View File

@ -7,29 +7,23 @@
namespace DB
{
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupMutablePtr = std::shared_ptr<IBackup>;
class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
struct BackupSettings;
class IBackupCoordination;
using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
using DataRestoreTasks = std::vector<std::function<void()>>;
class AccessRightsElements;
class Context;
using ContextPtr = std::shared_ptr<const Context>;
class DDLRenamingMap;
/// Prepares backup entries.
BackupEntries makeBackupEntries(
const ContextPtr & context,
const ASTBackupQuery::Elements & elements,
const BackupSettings & backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
std::chrono::seconds timeout_for_other_nodes_to_prepare = std::chrono::seconds::zero());
/// Initializes a DDLRenamingMap from a BACKUP or RESTORE query.
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements);
/// Write backup entries to an opened backup.
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool);
/// Run data restoring tasks which insert data to tables.
void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool);
/// Returns access required to execute BACKUP query.
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements, const BackupSettings & backup_settings);
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements);
}

View File

@ -4,13 +4,13 @@
#include <Backups/BackupSettings.h>
#include <Backups/BackupUtils.h>
#include <Backups/IBackupEntry.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupCoordinationDistributed.h>
#include <Backups/BackupCoordinationLocal.h>
#include <Backups/IRestoreTask.h>
#include <Backups/RestoreCoordinationDistributed.h>
#include <Backups/RestoreCoordinationLocal.h>
#include <Backups/RestoreSettings.h>
#include <Backups/RestoreUtils.h>
#include <Backups/RestorerFromBackup.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
@ -51,119 +51,121 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c
{
UUID backup_uuid = UUIDHelpers::generateV4();
auto backup_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
auto backup_settings = BackupSettings::fromBackupQuery(*backup_query);
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
bool on_cluster = !backup_query->cluster.empty();
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
if (on_cluster || backup_settings.async)
context_in_use = mutable_context = Context::createCopy(context);
addInfo(backup_uuid, backup_info.toString(), BackupStatus::MAKING_BACKUP, backup_settings.internal);
std::shared_ptr<IBackupCoordination> backup_coordination;
SCOPE_EXIT({
if (backup_coordination && !backup_settings.internal)
backup_coordination->drop();
});
BackupMutablePtr backup;
ContextPtr cloned_context;
bool on_cluster = !backup_query->cluster.empty();
std::shared_ptr<BlockIO> on_cluster_io;
try
{
auto access_to_check = getRequiredAccessToBackup(backup_query->elements, backup_settings);
if (!on_cluster)
context->checkAccess(access_to_check);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
cluster = context->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
backup_settings.copySettingsToQuery(*backup_query);
}
if (!backup_settings.coordination_zk_path.empty())
backup_coordination = std::make_shared<BackupCoordinationDistributed>(
backup_settings.coordination_zk_path,
[global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); });
else
backup_coordination = std::make_shared<BackupCoordinationLocal>();
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.backup_uuid = backup_uuid;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup = BackupFactory::instance().createBackup(backup_create_params);
ContextMutablePtr mutable_context;
if (on_cluster || backup_settings.async)
cloned_context = mutable_context = Context::createCopy(context);
else
cloned_context = context; /// No need to clone context
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = access_to_check;
mutable_context->setSetting("distributed_ddl_task_timeout", -1); // No timeout
mutable_context->setSetting("distributed_ddl_output_mode", Field{"throw"});
auto res = executeDDLQueryOnCluster(backup_query, mutable_context, params);
on_cluster_io = std::make_shared<BlockIO>(std::move(res));
}
}
catch (...)
{
setStatus(backup_uuid, BackupStatus::FAILED_TO_BACKUP);
throw;
}
auto job = [this,
backup,
backup_uuid,
backup_query,
backup_settings,
backup_coordination,
on_cluster_io,
cloned_context](bool in_separate_thread)
backup_info,
on_cluster,
context_in_use,
mutable_context](bool in_separate_thread) mutable
{
try
{
if (on_cluster_io)
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context_in_use->checkAccess(required_access);
/// Make a backup coordination.
std::shared_ptr<IBackupCoordination> backup_coordination;
SCOPE_EXIT({
if (backup_coordination && !backup_settings.internal)
backup_coordination->drop();
});
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context_in_use->getMacros()->expand(backup_query->cluster);
cluster = context_in_use->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
}
if (!backup_settings.coordination_zk_path.empty())
{
backup_coordination = std::make_shared<BackupCoordinationDistributed>(
backup_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
backup_coordination = std::make_shared<BackupCoordinationLocal>();
}
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context_in_use;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.backup_uuid = backup_uuid;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
mutable_context->setSetting("distributed_ddl_task_timeout", -1); // No timeout
mutable_context->setSetting("distributed_ddl_output_mode", Field{"throw"});
backup_settings.copySettingsToQuery(*backup_query);
auto res = executeDDLQueryOnCluster(backup_query, mutable_context, params);
auto on_cluster_io = std::make_shared<BlockIO>(std::move(res));
PullingPipelineExecutor executor(on_cluster_io->pipeline);
Block block;
while (executor.pull(block))
;
backup->finalizeWriting();
while (executor.pull(block));
}
else
{
std::optional<CurrentThread::QueryScope> query_scope;
if (in_separate_thread)
query_scope.emplace(cloned_context);
query_scope.emplace(context_in_use);
backup_query->setDatabase(cloned_context->getCurrentDatabase());
backup_query->setCurrentDatabase(context_in_use->getCurrentDatabase());
BackupEntries backup_entries;
{
auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.backup_prepare_timeout", -1)};
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use, timeout};
backup_entries = backup_entries_collector.getBackupEntries();
}
auto timeout_for_preparing = std::chrono::seconds{cloned_context->getConfigRef().getInt("backups.backup_prepare_timeout", -1)};
auto backup_entries
= makeBackupEntries(cloned_context, backup_query->elements, backup_settings, backup_coordination, timeout_for_preparing);
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
}
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
backup->finalizeWriting();
/// Close the backup.
backup.reset();
setStatus(backup_uuid, BackupStatus::BACKUP_COMPLETE);
}
catch (...)
@ -175,7 +177,7 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c
};
if (backup_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job] { job(true); });
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
@ -187,85 +189,99 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
{
UUID restore_uuid = UUIDHelpers::generateV4();
auto restore_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
auto restore_settings = RestoreSettings::fromRestoreQuery(*restore_query);
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
bool on_cluster = !restore_query->cluster.empty();
ContextMutablePtr context_in_use = context;
if (restore_settings.async || on_cluster)
context_in_use = Context::createCopy(context);
addInfo(restore_uuid, backup_info.toString(), BackupStatus::RESTORING, restore_settings.internal);
std::shared_ptr<IRestoreCoordination> restore_coordination;
SCOPE_EXIT({
if (restore_coordination && !restore_settings.internal)
restore_coordination->drop();
});
ContextMutablePtr cloned_context;
std::shared_ptr<BlockIO> on_cluster_io;
bool on_cluster = !restore_query->cluster.empty();
try
{
auto access_to_check = getRequiredAccessToRestore(restore_query->elements, restore_settings);
if (!on_cluster)
context->checkAccess(access_to_check);
ClusterPtr cluster;
if (on_cluster)
{
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
cluster = context->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
if (restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
restore_settings.copySettingsToQuery(*restore_query);
}
if (!restore_settings.coordination_zk_path.empty())
restore_coordination = std::make_shared<RestoreCoordinationDistributed>(
restore_settings.coordination_zk_path,
[global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); });
else
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
if (on_cluster || restore_settings.async)
cloned_context = Context::createCopy(context);
else
cloned_context = context; /// No need to clone context
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
params.access_to_check = access_to_check;
cloned_context->setSetting("distributed_ddl_task_timeout", -1); // No timeout
cloned_context->setSetting("distributed_ddl_output_mode", Field{"throw"});
auto res = executeDDLQueryOnCluster(restore_query, cloned_context, params);
on_cluster_io = std::make_shared<BlockIO>(std::move(res));
}
}
catch (...)
{
setStatus(restore_uuid, BackupStatus::FAILED_TO_RESTORE);
throw;
}
auto job = [this,
backup_info,
restore_uuid,
restore_query,
restore_settings,
restore_coordination,
on_cluster_io,
cloned_context](bool in_separate_thread)
backup_info,
on_cluster,
context_in_use](bool in_separate_thread) mutable
{
try
{
if (on_cluster_io)
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context_in_use;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context_in_use->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
if (on_cluster)
{
restore_query->cluster = context_in_use->getMacros()->expand(restore_query->cluster);
cluster = context_in_use->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use, {}};
dummy_restorer.checkAccessOnly();
}
}
/// Make a restore coordination.
std::shared_ptr<IRestoreCoordination> restore_coordination;
SCOPE_EXIT({
if (restore_coordination && !restore_settings.internal)
restore_coordination->drop();
});
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
if (!restore_settings.coordination_zk_path.empty())
{
restore_coordination = std::make_shared<RestoreCoordinationDistributed>(
restore_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
}
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
context_in_use->setSetting("distributed_ddl_task_timeout", -1); // No timeout
context_in_use->setSetting("distributed_ddl_output_mode", Field{"throw"});
restore_settings.copySettingsToQuery(*restore_query);
auto res = executeDDLQueryOnCluster(restore_query, context_in_use, params);
auto on_cluster_io = std::make_shared<BlockIO>(std::move(res));
PullingPipelineExecutor executor(on_cluster_io->pipeline);
Block block;
while (executor.pull(block))
@ -275,24 +291,20 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
{
std::optional<CurrentThread::QueryScope> query_scope;
if (in_separate_thread)
query_scope.emplace(cloned_context);
query_scope.emplace(context_in_use);
restore_query->setDatabase(cloned_context->getCurrentDatabase());
restore_query->setCurrentDatabase(current_database);
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = cloned_context;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
DataRestoreTasks data_restore_tasks;
{
auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.restore_metadata_timeout", -1)};
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context_in_use, timeout};
restorer.restoreMetadata();
data_restore_tasks = restorer.getDataRestoreTasks();
}
auto timeout_for_restoring_metadata
= std::chrono::seconds{cloned_context->getConfigRef().getInt("backups.restore_metadata_timeout", -1)};
auto restore_tasks = makeRestoreTasks(
cloned_context, backup, restore_query->elements, restore_settings, restore_coordination, timeout_for_restoring_metadata);
restoreMetadata(restore_tasks, restore_settings, restore_coordination, timeout_for_restoring_metadata);
restoreData(restore_tasks, restores_thread_pool);
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
}
setStatus(restore_uuid, BackupStatus::RESTORED);
@ -306,7 +318,7 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
};
if (restore_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job] { job(true); });
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);

View File

@ -1,87 +0,0 @@
#include <Backups/DDLCompareUtils.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
namespace DB
{
namespace
{
std::shared_ptr<const ASTCreateQuery> prepareDDLToCompare(const ASTCreateQuery & ast)
{
auto res = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(ast.shared_from_this());
std::shared_ptr<ASTCreateQuery> clone;
auto get_clone = [&]
{
if (!clone)
{
clone = typeid_cast<std::shared_ptr<ASTCreateQuery>>(res->clone());
res = clone;
}
return clone;
};
/// Remove UUIDs.
if (res->uuid != UUIDHelpers::Nil)
get_clone()->uuid = UUIDHelpers::Nil;
if (res->to_inner_uuid != UUIDHelpers::Nil)
get_clone()->to_inner_uuid = UUIDHelpers::Nil;
/// Clear IF NOT EXISTS flag.
if (res->if_not_exists)
get_clone()->if_not_exists = false;
return res;
}
}
bool areTableDefinitionsSame(const IAST & table1, const IAST & table2)
{
auto ast1 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(table1.shared_from_this());
if (!ast1 || !ast1->table)
return false;
auto ast2 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(table2.shared_from_this());
if (!ast2 || !ast2->table)
return false;
if ((ast1->uuid != ast2->uuid) || (ast1->to_inner_uuid != ast2->to_inner_uuid) ||
(ast1->if_not_exists != ast2->if_not_exists))
{
ast1 = prepareDDLToCompare(*ast1);
ast2 = prepareDDLToCompare(*ast2);
}
return serializeAST(*ast1) == serializeAST(*ast1);
}
bool areDatabaseDefinitionsSame(const IAST & database1, const IAST & database2)
{
auto ast1 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(database1.shared_from_this());
if (!ast1 || ast1->table || !ast1->database)
return false;
auto ast2 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(database2.shared_from_this());
if (!ast2 || ast2->table || !ast2->database)
return false;
if ((ast1->uuid != ast2->uuid) || (ast1->if_not_exists != ast2->if_not_exists))
{
ast1 = prepareDDLToCompare(*ast1);
ast2 = prepareDDLToCompare(*ast2);
}
return serializeAST(*ast1) == serializeAST(*ast1);
}
bool areTableDataCompatible(const IAST & src_table, const IAST & dest_table)
{
return areTableDefinitionsSame(src_table, dest_table);
}
}

View File

@ -1,17 +0,0 @@
#pragma once
namespace DB
{
class IAST;
/// Checks that two table definitions are actually the same.
bool areTableDefinitionsSame(const IAST & table1, const IAST & table2);
/// Checks that two database definitions are actually the same.
bool areDatabaseDefinitionsSame(const IAST & database1, const IAST & database2);
/// Whether the data from the first table can be attached to the second table.
bool areTableDataCompatible(const IAST & src_table, const IAST & dest_table);
}

View File

@ -1,387 +0,0 @@
#include <Backups/DDLRenamingVisitor.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTBackupQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <TableFunctions/TableFunctionFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int WRONG_DDL_RENAMING_SETTINGS;
extern const int LOGICAL_ERROR;
}
namespace
{
/// Replaces names of tables and databases used in a CREATE query, which can be either CREATE TABLE or
/// CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
void visitCreateQuery(ASTCreateQuery & create, const DDLRenamingVisitor::Data & data)
{
if (create.table)
{
DatabaseAndTableName table_name;
table_name.second = create.getTable();
if (create.temporary)
table_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
else if (create.database)
table_name.first = create.getDatabase();
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE TABLE query must not be empty");
table_name = data.renaming_settings.getNewTableName(table_name);
if (table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
{
create.temporary = true;
create.setDatabase("");
}
else
{
create.temporary = false;
create.setDatabase(table_name.first);
}
create.setTable(table_name.second);
}
else if (create.database)
{
String database_name = create.getDatabase();
database_name = data.renaming_settings.getNewDatabaseName(database_name);
create.setDatabase(database_name);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE DATABASE query must not be empty");
if (!create.as_table.empty() && !create.as_database.empty())
std::tie(create.as_database, create.as_table) = data.renaming_settings.getNewTableName({create.as_database, create.as_table});
if (!create.to_table_id.table_name.empty() && !create.to_table_id.database_name.empty())
{
auto to_table = data.renaming_settings.getNewTableName({create.to_table_id.database_name, create.to_table_id.table_name});
create.to_table_id = StorageID{to_table.first, to_table.second};
}
}
/// Replaces names of a database and a table in a expression like `db`.`table`
void visitTableExpression(ASTTableExpression & expr, const DDLRenamingVisitor::Data & data)
{
if (!expr.database_and_table_name)
return;
ASTIdentifier * id = expr.database_and_table_name->as<ASTIdentifier>();
if (!id)
return;
auto table_id = id->createTable();
if (!table_id)
return;
const String & db_name = table_id->getDatabaseName();
const String & table_name = table_id->shortName();
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
expr.database_and_table_name = std::make_shared<ASTIdentifier>(Strings{new_db_name, new_table_name});
expr.children.push_back(expr.database_and_table_name);
}
/// Replaces a database's name passed via an argument of the function merge() or the table engine Merge.
void visitFunctionMerge(ASTFunction & function, const DDLRenamingVisitor::Data & data)
{
if (!function.arguments)
return;
/// The first argument is a database's name and we can rename it.
/// The second argument is a regular expression and we can do nothing about it.
auto & args = function.arguments->as<ASTExpressionList &>().children;
size_t db_name_arg_index = 0;
if (args.size() <= db_name_arg_index)
return;
String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_arg_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
if (db_name.empty())
return;
String new_db_name = data.renaming_settings.getNewDatabaseName(db_name);
if (new_db_name == db_name)
return;
args[db_name_arg_index] = std::make_shared<ASTLiteral>(new_db_name);
}
/// Replaces names of a table and a database passed via arguments of the function remote() or cluster() or the table engine Distributed.
void visitFunctionRemote(ASTFunction & function, const DDLRenamingVisitor::Data & data)
{
if (!function.arguments)
return;
/// The first argument is an address or cluster's name, so we skip it.
/// The second argument can be either 'db.name' or just 'db' followed by the third argument 'table'.
auto & args = function.arguments->as<ASTExpressionList &>().children;
const auto * second_arg_as_function = args[1]->as<ASTFunction>();
if (second_arg_as_function && TableFunctionFactory::instance().isTableFunctionName(second_arg_as_function->name))
return;
size_t db_name_index = 1;
if (args.size() <= db_name_index)
return;
String name = evaluateConstantExpressionForDatabaseName(args[db_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
size_t table_name_index = static_cast<size_t>(-1);
QualifiedTableName qualified_name;
if (function.name == "Distributed")
qualified_name.table = name;
else
qualified_name = QualifiedTableName::parseFromString(name);
if (qualified_name.database.empty())
{
std::swap(qualified_name.database, qualified_name.table);
table_name_index = 2;
if (args.size() <= table_name_index)
return;
qualified_name.table = evaluateConstantExpressionForDatabaseName(args[table_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
}
const String & db_name = qualified_name.database;
const String & table_name = qualified_name.table;
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
if (table_name_index != static_cast<size_t>(-1))
{
if (new_db_name != db_name)
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
if (new_table_name != table_name)
args[table_name_index] = std::make_shared<ASTLiteral>(new_table_name);
}
else
{
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
args.insert(args.begin() + db_name_index + 1, std::make_shared<ASTLiteral>(new_table_name));
}
}
/// Replaces names of tables and databases used in arguments of a table function or a table engine.
void visitFunction(ASTFunction & function, const DDLRenamingVisitor::Data & data)
{
if ((function.name == "merge") || (function.name == "Merge"))
{
visitFunctionMerge(function, data);
}
else if ((function.name == "remote") || (function.name == "remoteSecure") || (function.name == "cluster") ||
(function.name == "clusterAllReplicas") || (function.name == "Distributed"))
{
visitFunctionRemote(function, data);
}
}
/// Replaces names of a table and a database used in source parameters of a dictionary.
void visitDictionary(ASTDictionary & dictionary, const DDLRenamingVisitor::Data & data)
{
if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements)
return;
auto & elements = dictionary.source->elements->as<ASTExpressionList &>().children;
String db_name, table_name;
size_t db_name_index = static_cast<size_t>(-1);
size_t table_name_index = static_cast<size_t>(-1);
for (size_t i = 0; i != elements.size(); ++i)
{
auto & pair = elements[i]->as<ASTPair &>();
if (pair.first == "db")
{
if (db_name_index != static_cast<size_t>(-1))
return;
db_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
db_name_index = i;
}
else if (pair.first == "table")
{
if (table_name_index != static_cast<size_t>(-1))
return;
table_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
table_name_index = i;
}
}
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
if (new_db_name != db_name)
{
auto & pair = elements[db_name_index]->as<ASTPair &>();
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_db_name));
}
if (new_table_name != table_name)
{
auto & pair = elements[table_name_index]->as<ASTPair &>();
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_table_name));
}
}
}
void DDLRenamingSettings::setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name)
{
if (old_table_name.first.empty() || old_table_name.second.empty() || new_table_name.first.empty() || new_table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed for DDLRenamingSettings::setNewTableName");
auto it = old_to_new_table_names.find(old_table_name);
if ((it != old_to_new_table_names.end()))
{
if (it->second == new_table_name)
return;
throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that table {}.{} should be renamed to {}.{} and to {}.{} at the same time",
backQuoteIfNeed(old_table_name.first), backQuoteIfNeed(old_table_name.second),
backQuoteIfNeed(it->second.first), backQuoteIfNeed(it->second.second),
backQuoteIfNeed(new_table_name.first), backQuoteIfNeed(new_table_name.second));
}
old_to_new_table_names[old_table_name] = new_table_name;
}
void DDLRenamingSettings::setNewDatabaseName(const String & old_database_name, const String & new_database_name)
{
if (old_database_name.empty() || new_database_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed for DDLRenamingSettings::setNewDatabaseName");
auto it = old_to_new_database_names.find(old_database_name);
if ((it != old_to_new_database_names.end()))
{
if (it->second == new_database_name)
return;
throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that database {} should be renamed to {} and to {} at the same time",
backQuoteIfNeed(old_database_name), backQuoteIfNeed(it->second), backQuoteIfNeed(new_database_name));
}
old_to_new_database_names[old_database_name] = new_database_name;
}
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery & backup_query)
{
setFromBackupQuery(backup_query.elements);
}
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements)
{
old_to_new_table_names.clear();
old_to_new_database_names.clear();
using ElementType = ASTBackupQuery::ElementType;
for (const auto & element : backup_query_elements)
{
switch (element.type)
{
case ElementType::TABLE:
{
const String & table_name = element.name.second;
String database_name = element.name.first;
if (element.is_temp_db)
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!table_name.empty());
assert(!database_name.empty());
const String & new_table_name = element.new_name.second;
String new_database_name = element.new_name.first;
if (element.is_temp_db)
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!new_table_name.empty());
assert(!new_database_name.empty());
setNewTableName({database_name, table_name}, {new_database_name, new_table_name});
break;
}
case ASTBackupQuery::DATABASE:
{
String database_name = element.name.first;
if (element.is_temp_db)
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!database_name.empty());
String new_database_name = element.new_name.first;
if (element.is_temp_db)
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!new_database_name.empty());
setNewDatabaseName(database_name, new_database_name);
break;
}
case ASTBackupQuery::ALL_DATABASES: break;
}
}
}
DatabaseAndTableName DDLRenamingSettings::getNewTableName(const DatabaseAndTableName & old_table_name) const
{
auto it = old_to_new_table_names.find(old_table_name);
if (it != old_to_new_table_names.end())
return it->second;
return {getNewDatabaseName(old_table_name.first), old_table_name.second};
}
const String & DDLRenamingSettings::getNewDatabaseName(const String & old_database_name) const
{
auto it = old_to_new_database_names.find(old_database_name);
if (it != old_to_new_database_names.end())
return it->second;
return old_database_name;
}
bool DDLRenamingVisitor::needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
void DDLRenamingVisitor::visit(ASTPtr & ast, const Data & data)
{
if (auto * create = ast->as<ASTCreateQuery>())
visitCreateQuery(*create, data);
else if (auto * expr = ast->as<ASTTableExpression>())
visitTableExpression(*expr, data);
else if (auto * function = ast->as<ASTFunction>())
visitFunction(*function, data);
else if (auto * dictionary = ast->as<ASTDictionary>())
visitDictionary(*dictionary, data);
}
void renameInCreateQuery(ASTPtr & ast, const ContextPtr & global_context, const DDLRenamingSettings & renaming_settings)
{
try
{
DDLRenamingVisitor::Data data{renaming_settings, global_context};
DDLRenamingVisitor::Visitor{data}.visit(ast);
}
catch (...)
{
tryLogCurrentException("Backup", "Error while renaming in AST");
}
}
}

View File

@ -1,61 +0,0 @@
#pragma once
#include <Core/Types.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTBackupQuery.h>
#include <map>
#include <memory>
#include <unordered_map>
namespace DB
{
using DatabaseAndTableName = std::pair<String, String>;
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
class Context;
using ContextPtr = std::shared_ptr<const Context>;
/// Keeps information about renamings of databases or tables being processed
/// while we're making a backup or while we're restoring from a backup.
class DDLRenamingSettings
{
public:
DDLRenamingSettings() = default;
void setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name);
void setNewDatabaseName(const String & old_database_name, const String & new_database_name);
void setFromBackupQuery(const ASTBackupQuery & backup_query);
void setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements);
/// Changes names according to the renaming.
DatabaseAndTableName getNewTableName(const DatabaseAndTableName & old_table_name) const;
const String & getNewDatabaseName(const String & old_database_name) const;
private:
std::map<DatabaseAndTableName, DatabaseAndTableName> old_to_new_table_names;
std::unordered_map<String, String> old_to_new_database_names;
};
/// Changes names in AST according to the renaming settings.
void renameInCreateQuery(ASTPtr & ast, const ContextPtr & global_context, const DDLRenamingSettings & renaming_settings);
/// Visits ASTCreateQuery and changes names of tables and databases according to passed DDLRenamingConfig.
class DDLRenamingVisitor
{
public:
struct Data
{
const DDLRenamingSettings & renaming_settings;
ContextPtr context;
};
using Visitor = InDepthNodeVisitor<DDLRenamingVisitor, false>;
static bool needChildVisit(ASTPtr &, const ASTPtr &);
static void visit(ASTPtr & ast, const Data & data);
};
}

View File

@ -36,18 +36,19 @@ public:
/// Returns UUID of the backup.
virtual UUID getUUID() const = 0;
/// Returns names of entries stored in the backup.
/// If `prefix` isn't empty the function will return only the names starting with
/// the prefix (but without the prefix itself).
/// If the `terminator` isn't empty the function will returns only parts of the names
/// before the terminator. For example, list("", "") returns names of all the entries
/// in the backup; and list("data/", "/") return kind of a list of folders and
/// files stored in the "data/" directory inside the backup.
virtual Strings listFiles(const String & prefix = "", const String & terminator = "/") const = 0; /// NOLINT
/// Returns names of entries stored in a specified directory in the backup.
/// If `directory` is empty or '/' the functions returns entries in the backup's root.
virtual Strings listFiles(const String & directory, bool recursive = false) const = 0;
/// Checks if a specified directory contains any files.
/// The function returns the same as `!listFiles(directory).empty()`.
virtual bool hasFiles(const String & directory) const = 0;
using SizeAndChecksum = std::pair<UInt64, UInt128>;
/// Checks if an entry with a specified name exists.
virtual bool fileExists(const String & file_name) const = 0;
virtual bool fileExists(const std::pair<UInt64, UInt128> & size_and_checksum) const = 0;
virtual bool fileExists(const SizeAndChecksum & size_and_checksum) const = 0;
/// Returns the size of the entry's data.
/// This function does the same as `read(file_name)->getSize()` but faster.
@ -57,8 +58,6 @@ public:
/// This function does the same as `read(file_name)->getCheckum()` but faster.
virtual UInt128 getFileChecksum(const String & file_name) const = 0;
using SizeAndChecksum = std::pair<UInt64, UInt128>;
/// Returns both the size and checksum in one call.
virtual SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const = 0;

View File

@ -6,7 +6,6 @@
namespace DB
{
using DatabaseAndTableName = std::pair<String, String>;
/// Keeps information about files contained in a backup.
class IBackupCoordination
@ -14,10 +13,11 @@ class IBackupCoordination
public:
virtual ~IBackupCoordination() = default;
/// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getReplicatedTableDataPaths().
virtual void addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
virtual void syncStageError(const String & current_host, const String & error_message) = 0;
struct PartNameAndChecksum
{
@ -27,30 +27,23 @@ public:
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
/// getReplicatedTablePartNames().
/// getReplicatedPartNames().
/// Checksums are used only to control that parts under the same names on different replicas are the same.
virtual void addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
= 0;
/// Sets that a specified host finished preparations for copying the backup's files, successfully or not.
/// `error_message` should be set to true if it was not successful.
virtual void finishPreparing(const String & host_id, const String & error_message = {}) = 0;
/// Waits for a specified time for specified hosts to finish preparation for copying the backup's files.
virtual void
waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */) const = 0;
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedTableDataPath()).
virtual Strings getReplicatedTableDataPaths(const String & table_zk_path) const = 0;
virtual void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) = 0;
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
/// This is the same list as it was added by call of the function addReplicatedTablePartNames() but without duplications and without
/// This is the same list as it was added by call of the function addReplicatedPartNames() but without duplications and without
/// parts covered by another parts.
virtual Strings getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const = 0;
virtual Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const = 0;
/// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getReplicatedDataPaths().
virtual void addReplicatedDataPath(const String & table_zk_path, const String & data_path) = 0;
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
virtual Strings getReplicatedDataPaths(const String & table_zk_path) const = 0;
struct FileInfo
{
@ -87,7 +80,8 @@ public:
virtual void updateFileInfo(const FileInfo & file_info) = 0;
virtual std::vector<FileInfo> getAllFileInfos() const = 0;
virtual Strings listFiles(const String & prefix, const String & terminator) const = 0;
virtual Strings listFiles(const String & directory, bool recursive) const = 0;
virtual bool hasFiles(const String & directory) const = 0;
using SizeAndChecksum = std::pair<UInt64, UInt128>;

View File

@ -13,53 +13,22 @@ class IRestoreCoordination
public:
virtual ~IRestoreCoordination() = default;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
virtual void syncStageError(const String & current_host, const String & error_message) = 0;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
virtual bool startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name)
= 0;
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
virtual void finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message = {})
= 0;
/// Wait for another host to create a table in a replicated database.
virtual void waitForTableCreatedInReplicatedDB(
const String & database_name,
const String & database_zk_path,
const String & table_name,
std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */)
= 0;
/// Adds a path in backup used by a replicated table.
/// This function can be called multiple times for the same table with different `host_id`, and in that case
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
virtual void addReplicatedTableDataPath(
const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & data_path_in_backup)
= 0;
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
/// In the latter case `error_message` should be set.
virtual void finishRestoringMetadata(const String & host_id, const String & error_message = {}) = 0;
/// Waits for a specified list of hosts to finish restoring their metadata.
virtual void waitForAllHostsRestoredMetadata(
const Strings & host_ids, std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */) const = 0;
/// Gets path in backup used by a replicated table.
virtual String getReplicatedTableDataPath(const String & table_zk_path) const = 0;
virtual bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) = 0;
/// Sets that this replica is going to restore a partition in a replicated table.
/// The function returns false if this partition is being already restored by another replica.
virtual bool startInsertingDataToPartitionInReplicatedTable(
const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & partition_name)
= 0;
virtual bool acquireInsertingDataIntoReplicatedTable(const String & table_zk_path) = 0;
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
virtual bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) = 0;
/// Removes remotely stored information.
virtual void drop() {}

View File

@ -1,36 +0,0 @@
#pragma once
#include <memory>
#include <vector>
namespace DB
{
/// Represents a task of restoring something (database / table / table's part) from backup.
class IRestoreTask
{
public:
IRestoreTask() = default;
virtual ~IRestoreTask() = default;
enum class RestoreKind
{
/// This task restores metadata (definitions of databases and tables).
/// Tasks restoring metadata are executed first and strictly in one thread.
METADATA,
/// This task restores tables' data. Such tasks can be executed in parallel.
DATA,
};
virtual RestoreKind getRestoreKind() const { return RestoreKind::DATA; }
/// Perform restoring, the function also can return a list of nested tasks that should be run later.
virtual std::vector<std::unique_ptr<IRestoreTask>> run() = 0;
};
using RestoreTaskPtr = std::unique_ptr<IRestoreTask>;
using RestoreTasks = std::vector<RestoreTaskPtr>;
}

View File

@ -1,248 +1,15 @@
#include <Backups/RestoreCoordinationDistributed.h>
#include <Backups/formatTableNameOrTemporaryTableName.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/ReadBufferFromString.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
namespace
{
struct ReplicatedTableDataPath
{
String host_id;
DatabaseAndTableName table_name;
String data_path_in_backup;
String serialize() const
{
WriteBufferFromOwnString out;
writeBinary(host_id, out);
writeBinary(table_name.first, out);
writeBinary(table_name.second, out);
writeBinary(data_path_in_backup, out);
return out.str();
}
static ReplicatedTableDataPath deserialize(const String & str)
{
ReadBufferFromString in{str};
ReplicatedTableDataPath res;
readBinary(res.host_id, in);
readBinary(res.table_name.first, in);
readBinary(res.table_name.second, in);
readBinary(res.data_path_in_backup, in);
return res;
}
};
}
class RestoreCoordinationDistributed::ReplicatedDatabasesMetadataSync
{
public:
ReplicatedDatabasesMetadataSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
: zookeeper_path(zookeeper_path_), get_zookeeper(get_zookeeper_), log(&Poco::Logger::get("RestoreCoordination"))
{
createRootNodes();
}
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool startCreatingTable(
const String & host_id_, const String & database_name_, const String & database_zk_path_, const String & table_name_)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/" + escapeForFileName(database_zk_path_);
zookeeper->createIfNotExists(path, "");
TableStatus status;
status.host_id = host_id_;
status.table_name = DatabaseAndTableName{database_name_, table_name_};
path += "/" + escapeForFileName(table_name_);
auto code = zookeeper->tryCreate(path, status.serialize(), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
void finishCreatingTable(
const String & /* host_id_ */,
const String & database_name_,
const String & database_zk_path_,
const String & table_name_,
const String & error_message_)
{
if (error_message_.empty())
LOG_TRACE(log, "Created table {}.{}", database_name_, table_name_);
else
LOG_TRACE(log, "Failed to created table {}.{}: {}", database_name_, table_name_, error_message_);
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/" + escapeForFileName(database_zk_path_) + "/" + escapeForFileName(table_name_);
auto status = TableStatus::deserialize(zookeeper->get(path));
status.error_message = error_message_;
status.ready = error_message_.empty();
zookeeper->set(path, status.serialize());
}
/// Wait for another host to create a table in a replicated database.
void waitForTableCreated(
const String & /* database_name_ */, const String & database_zk_path_, const String & table_name_, std::chrono::seconds timeout_)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/" + escapeForFileName(database_zk_path_) + "/" + escapeForFileName(table_name_);
TableStatus status;
std::atomic<bool> watch_set = false;
std::condition_variable watch_triggered_event;
auto watch_callback = [&](const Coordination::WatchResponse &)
{
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
watch_triggered_event.notify_all();
};
auto watch_triggered = [&] { return !watch_set; };
bool use_timeout = (timeout_.count() >= 0);
std::chrono::steady_clock::duration time_left = timeout_;
std::mutex dummy_mutex;
while (true)
{
if (use_timeout && (time_left.count() <= 0))
{
status = TableStatus::deserialize(zookeeper->get(path));
break;
}
watch_set = true;
status = TableStatus::deserialize(zookeeper->getWatch(path, nullptr, watch_callback));
if (!status.error_message.empty() || status.ready)
break;
LOG_TRACE(log, "Waiting for host {} to create table {}.{}", status.host_id, status.table_name.first, status.table_name.second);
{
std::unique_lock dummy_lock{dummy_mutex};
if (use_timeout)
{
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
if (!watch_triggered_event.wait_for(dummy_lock, time_left, watch_triggered))
break;
time_left -= (std::chrono::steady_clock::now() - start_time);
}
else
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
}
if (watch_set)
{
/// Remove watch by triggering it.
++status.increment;
zookeeper->set(path, status.serialize());
std::unique_lock dummy_lock{dummy_mutex};
watch_triggered_event.wait_for(dummy_lock, timeout_, watch_triggered);
}
if (!status.error_message.empty())
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} failed to create table {}.{}: {}", status.host_id, status.table_name.first, status.table_name.second, status.error_message);
if (status.ready)
{
LOG_TRACE(log, "Host {} created table {}.{}", status.host_id, status.table_name.first, status.table_name.second);
return;
}
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} was unable to create table {}.{} in {}",
status.host_id,
status.table_name.first,
table_name_,
to_string(timeout_));
}
private:
void createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
struct TableStatus
{
String host_id;
DatabaseAndTableName table_name;
bool ready = false;
String error_message;
size_t increment = 0;
String serialize() const
{
WriteBufferFromOwnString out;
writeBinary(host_id, out);
writeBinary(table_name.first, out);
writeBinary(table_name.second, out);
writeBinary(ready, out);
writeBinary(error_message, out);
writeBinary(increment, out);
return out.str();
}
static TableStatus deserialize(const String & str)
{
ReadBufferFromString in{str};
TableStatus res;
readBinary(res.host_id, in);
readBinary(res.table_name.first, in);
readBinary(res.table_name.second, in);
readBinary(res.ready, in);
readBinary(res.error_message, in);
readBinary(res.increment, in);
return res;
}
};
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
const Poco::Logger * log;
};
RestoreCoordinationDistributed::RestoreCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, replicated_databases_metadata_sync(
std::make_unique<ReplicatedDatabasesMetadataSync>(zookeeper_path_ + "/repl_databases_metadata", get_zookeeper_))
, all_metadata_barrier(zookeeper_path_ + "/all_metadata", get_zookeeper_, "RestoreCoordination", "restoring metadata")
, stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("RestoreCoordination"))
{
createRootNodes();
}
@ -254,8 +21,58 @@ void RestoreCoordinationDistributed::createRootNodes()
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_partitions", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
}
void RestoreCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout)
{
stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout);
}
void RestoreCoordinationDistributed::syncStageError(const String & current_host, const String & error_message)
{
stage_sync.syncStageError(current_host, error_message);
}
bool RestoreCoordinationDistributed::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
bool RestoreCoordinationDistributed::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
bool RestoreCoordinationDistributed::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
void RestoreCoordinationDistributed::removeAllNodes()
@ -264,104 +81,6 @@ void RestoreCoordinationDistributed::removeAllNodes()
zookeeper->removeRecursive(zookeeper_path);
}
bool RestoreCoordinationDistributed::startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name)
{
return replicated_databases_metadata_sync->startCreatingTable(host_id, database_name, database_zk_path, table_name);
}
/// Ends creating table in a replicated database, successfully or with an error.
/// In the latter case `error_message` should be set.
void RestoreCoordinationDistributed::finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message)
{
return replicated_databases_metadata_sync->finishCreatingTable(host_id, database_name, database_zk_path, table_name, error_message);
}
/// Wait for another host to create a table in a replicated database.
void RestoreCoordinationDistributed::waitForTableCreatedInReplicatedDB(
const String & database_name, const String & database_zk_path, const String & table_name, std::chrono::seconds timeout)
{
return replicated_databases_metadata_sync->waitForTableCreated(database_name, database_zk_path, table_name, timeout);
}
void RestoreCoordinationDistributed::finishRestoringMetadata(const String & host_id, const String & error_message)
{
all_metadata_barrier.finish(host_id, error_message);
}
void RestoreCoordinationDistributed::waitForAllHostsRestoredMetadata(const Strings & host_ids, std::chrono::seconds timeout) const
{
all_metadata_barrier.waitForAllHostsToFinish(host_ids, timeout);
}
void RestoreCoordinationDistributed::addReplicatedTableDataPath(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths/" + escapeForFileName(table_zk_path);
ReplicatedTableDataPath new_info;
new_info.host_id = host_id;
new_info.table_name = table_name;
new_info.data_path_in_backup = data_path_in_backup;
String new_info_str = new_info.serialize();
auto code = zookeeper->tryCreate(path, new_info_str, zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
while (code != Coordination::Error::ZOK)
{
Coordination::Stat stat;
ReplicatedTableDataPath cur_info = ReplicatedTableDataPath::deserialize(zookeeper->get(path, &stat));
if ((cur_info.host_id < host_id) || ((cur_info.host_id == host_id) && (cur_info.table_name <= table_name)))
break;
code = zookeeper->trySet(path, new_info_str, stat.version);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZBADVERSION))
throw zkutil::KeeperException(code, path);
}
}
String RestoreCoordinationDistributed::getReplicatedTableDataPath(const String & table_zk_path_) const
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths/" + escapeForFileName(table_zk_path_);
auto info = ReplicatedTableDataPath::deserialize(zookeeper->get(path));
return info.data_path_in_backup;
}
bool RestoreCoordinationDistributed::startInsertingDataToPartitionInReplicatedTable(
const String & host_id_,
const DatabaseAndTableName & table_name_,
const String & table_zk_path_,
const String & partition_name_)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_partitions/" + escapeForFileName(table_zk_path_);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(partition_name_);
String new_info = host_id_ + "|" + table_name_.first + "|" + table_name_.second;
auto code = zookeeper->tryCreate(path, new_info, zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
if (code == Coordination::Error::ZOK)
return true;
return zookeeper->get(path) == new_info;
}
void RestoreCoordinationDistributed::drop()
{
removeAllNodes();

View File

@ -2,7 +2,6 @@
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationHelpers.h>
#include <Common/ZooKeeper/Common.h>
namespace DB
@ -15,50 +14,22 @@ public:
RestoreCoordinationDistributed(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
~RestoreCoordinationDistributed() override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
void syncStageError(const String & current_host, const String & error_message) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name) override;
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
void finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message) override;
/// Wait for another host to create a table in a replicated database.
void waitForTableCreatedInReplicatedDB(
const String & database_name, const String & database_zk_path, const String & table_name, std::chrono::seconds timeout) override;
/// Sets path in backup used by a replicated table.
/// This function can be called multiple times for the same table with different `host_id`, and in that case
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
void addReplicatedTableDataPath(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup) override;
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
/// In the latter case `error_message` should be set.
void finishRestoringMetadata(const String & host_id, const String & error_message) override;
/// Waits for all hosts to finish restoring their metadata (i.e. to finish creating databases and tables). Returns false if time is out.
void waitForAllHostsRestoredMetadata(const Strings & host_ids, std::chrono::seconds timeout) const override;
/// Gets path in backup used by a replicated table.
String getReplicatedTableDataPath(const String & table_zk_path) const override;
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
/// Sets that this replica is going to restore a partition in a replicated table.
/// The function returns false if this partition is being already restored by another replica.
bool startInsertingDataToPartitionInReplicatedTable(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & partition_name) override;
bool acquireInsertingDataIntoReplicatedTable(const String & table_zk_path) override;
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
/// Removes remotely stored information.
void drop() override;
@ -71,8 +42,7 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
std::unique_ptr<ReplicatedDatabasesMetadataSync> replicated_databases_metadata_sync;
BackupCoordinationDistributedBarrier all_metadata_barrier;
BackupCoordinationStageSync stage_sync;
};
}

View File

@ -1,107 +1,35 @@
#include <Backups/RestoreCoordinationLocal.h>
#include <Backups/formatTableNameOrTemporaryTableName.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
RestoreCoordinationLocal::RestoreCoordinationLocal()
: log(&Poco::Logger::get("RestoreCoordination"))
{}
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
bool RestoreCoordinationLocal::startCreatingTableInReplicatedDB(
const String & /* host_id */,
const String & /* database_name */,
const String & /* database_zk_path */,
const String & /* table_name */)
void RestoreCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds)
{
}
void RestoreCoordinationLocal::syncStageError(const String &, const String &)
{
}
bool RestoreCoordinationLocal::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
std::lock_guard lock{mutex};
return acquired_tables_in_replicated_databases.emplace(std::pair<String, String>{database_zk_path, table_name}).second;
}
bool RestoreCoordinationLocal::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
std::lock_guard lock{mutex};
return acquired_data_in_replicated_tables.emplace(table_zk_path).second;
}
bool RestoreCoordinationLocal::acquireReplicatedAccessStorage(const String &)
{
return true;
}
void RestoreCoordinationLocal::finishCreatingTableInReplicatedDB(
const String & /* host_id */,
const String & database_name,
const String & /* database_zk_path */,
const String & table_name,
const String & error_message)
{
if (error_message.empty())
LOG_TRACE(log, "Created table {}.{}", database_name, table_name);
else
LOG_TRACE(log, "Failed to created table {}.{}: {}", database_name, table_name, error_message);
}
/// Wait for another host to create a table in a replicated database.
void RestoreCoordinationLocal::waitForTableCreatedInReplicatedDB(
const String & /* database_name */,
const String & /* database_zk_path */,
const String & /* table_name */,
std::chrono::seconds /* timeout */)
{
}
void RestoreCoordinationLocal::finishRestoringMetadata(const String & /* host_id */, const String & error_message)
{
LOG_TRACE(log, "Finished restoring metadata{}", (error_message.empty() ? "" : (" with error " + error_message)));
}
void RestoreCoordinationLocal::waitForAllHostsRestoredMetadata(const Strings & /* host_ids */, std::chrono::seconds /* timeout */) const
{
}
void RestoreCoordinationLocal::addReplicatedTableDataPath(const String & /* host_id */,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup)
{
std::lock_guard lock{mutex};
auto it = replicated_tables_data_paths.find(table_zk_path);
if (it == replicated_tables_data_paths.end())
{
ReplicatedTableDataPath new_info;
new_info.table_name = table_name;
new_info.data_path_in_backup = data_path_in_backup;
replicated_tables_data_paths.emplace(table_zk_path, std::move(new_info));
return;
}
else
{
auto & cur_info = it->second;
if (table_name < cur_info.table_name)
{
cur_info.table_name = table_name;
cur_info.data_path_in_backup = data_path_in_backup;
}
}
}
String RestoreCoordinationLocal::getReplicatedTableDataPath(const String & table_zk_path) const
{
std::lock_guard lock{mutex};
auto it = replicated_tables_data_paths.find(table_zk_path);
if (it == replicated_tables_data_paths.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicated data path is not set for zk_path={}", table_zk_path);
return it->second.data_path_in_backup;
}
bool RestoreCoordinationLocal::startInsertingDataToPartitionInReplicatedTable(
const String & /* host_id */, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & partition_name)
{
std::lock_guard lock{mutex};
auto key = std::pair{table_zk_path, partition_name};
auto it = replicated_tables_partitions.try_emplace(std::move(key), table_name).first;
return it->second == table_name;
}
}

View File

@ -1,10 +1,9 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <condition_variable>
#include <map>
#include <mutex>
#include <unordered_map>
#include <set>
#include <unordered_set>
namespace Poco { class Logger; }
@ -18,64 +17,27 @@ public:
RestoreCoordinationLocal();
~RestoreCoordinationLocal() override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
void syncStageError(const String & current_host, const String & error_message) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name) override;
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
void finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message) override;
/// Wait for another host to create a table in a replicated database.
void waitForTableCreatedInReplicatedDB(
const String & database_name, const String & database_zk_path, const String & table_name, std::chrono::seconds timeout) override;
/// Sets path in backup used by a replicated table.
/// This function can be called multiple times for the same table with different `host_id`, and in that case
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
void addReplicatedTableDataPath(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup) override;
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
/// In the latter case `error_message` should be set.
void finishRestoringMetadata(const String & host_id, const String & error_message) override;
/// Waits for all hosts to finish restoring their metadata (i.e. to finish creating databases and tables). Returns false if time is out.
void waitForAllHostsRestoredMetadata(const Strings & host_ids, std::chrono::seconds timeout) const override;
/// Gets path in backup used by a replicated table.
String getReplicatedTableDataPath(const String & table_zk_path) const override;
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
/// Sets that this replica is going to restore a partition in a replicated table.
/// The function returns false if this partition is being already restored by another replica.
bool startInsertingDataToPartitionInReplicatedTable(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & partition_name) override;
bool acquireInsertingDataIntoReplicatedTable(const String & table_zk_path) override;
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
private:
struct ReplicatedTableDataPath
{
DatabaseAndTableName table_name;
String data_path_in_backup;
};
std::unordered_map<String /* table_zk_path */, ReplicatedTableDataPath> replicated_tables_data_paths;
std::map<std::pair<String /* table_zk_path */, String /* partition_name */>, DatabaseAndTableName> replicated_tables_partitions;
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
mutable std::mutex mutex;
const Poco::Logger * log;
};
}

View File

@ -29,22 +29,43 @@ namespace
if (field.getType() == Field::Types::String)
{
const String & str = field.get<const String &>();
if (str == "1" || boost::iequals(str, "true"))
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
{
value = RestoreTableCreationMode::kCreate;
else if (str == "0" || boost::iequals(str, "false"))
return;
}
if (str == "0" || boost::iequals(str, "false") || boost::iequals(str, "must exist") || boost::iequals(str, "must-exist"))
{
value = RestoreTableCreationMode::kMustExist;
else if (boost::iequals(str, "if not exists"))
return;
}
if (boost::iequals(str, "if not exists") || boost::iequals(str, "if-not-exists")
|| boost::iequals(str, "create if not exists") || boost::iequals(str, "create-if-not-exists"))
{
value = RestoreTableCreationMode::kCreateIfNotExists;
else throw Exception("Cannot parse creation mode from string '" + str + "'",
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS);
return;
}
}
else
if (field.getType() == Field::Types::UInt64)
{
if (applyVisitor(FieldVisitorConvertToNumber<bool>(), field))
UInt64 number = field.get<UInt64>();
if (number == 1)
{
value = RestoreTableCreationMode::kCreate;
else
return;
}
if (number == 0)
{
value = RestoreTableCreationMode::kMustExist;
return;
}
}
throw Exception(ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Cannot parse creation mode from {}", field);
}
explicit operator Field() const
@ -60,6 +81,62 @@ namespace
};
using SettingFieldRestoreDatabaseCreationMode = SettingFieldRestoreTableCreationMode;
struct SettingFieldRestoreAccessCreationMode
{
RestoreAccessCreationMode value;
explicit SettingFieldRestoreAccessCreationMode(RestoreAccessCreationMode value_) : value(value_) {}
explicit SettingFieldRestoreAccessCreationMode(const Field & field)
{
if (field.getType() == Field::Types::String)
{
const String & str = field.get<const String &>();
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
{
value = RestoreAccessCreationMode::kCreate;
return;
}
if (boost::iequals(str, "if not exists") || boost::iequals(str, "if-not-exists")
|| boost::iequals(str, "create if not exists") || boost::iequals(str, "create-if-not-exists"))
{
value = RestoreAccessCreationMode::kCreateIfNotExists;
return;
}
if (boost::iequals(str, "replace") || boost::iequals(str, "create or replace") || boost::iequals(str, "create-or-replace"))
{
value = RestoreAccessCreationMode::kReplace;
return;
}
}
if (field.getType() == Field::Types::UInt64)
{
UInt64 number = field.get<UInt64>();
if (number == 1)
{
value = RestoreAccessCreationMode::kCreate;
return;
}
}
throw Exception(ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Cannot parse creation mode from {}", field);
}
explicit operator Field() const
{
switch (value)
{
case RestoreAccessCreationMode::kCreate: return Field{true};
case RestoreAccessCreationMode::kCreateIfNotExists: return Field{"if not exists"};
case RestoreAccessCreationMode::kReplace: return Field{"replace"};
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected value of enum RestoreAccessCreationMode: {}", static_cast<int>(value));
}
};
}
/// List of restore settings except base_backup_name and cluster_host_ids.
@ -76,6 +153,8 @@ namespace
M(UInt64, shard_num_in_backup) \
M(UInt64, replica_num_in_backup) \
M(Bool, allow_non_empty_tables) \
M(RestoreAccessCreationMode, create_access) \
M(Bool, allow_unresolved_access_dependencies) \
M(Bool, internal) \
M(String, host_id) \
M(String, coordination_zk_path)

Some files were not shown because too many files have changed in this diff Show More