Merge branch 'master' into dictinct_in_order_optimization

This commit is contained in:
mergify[bot] 2022-06-21 21:25:37 +00:00 committed by GitHub
commit f45b4f56d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
341 changed files with 10117 additions and 5658 deletions

View File

@ -7,6 +7,7 @@
#include <replxx.hxx>
#include <base/types.h>
#include <base/defines.h>
class LineReader
{
@ -20,8 +21,8 @@ public:
void addWords(Words && new_words);
private:
Words words;
Words words_no_case;
Words words TSA_GUARDED_BY(mutex);
Words words_no_case TSA_GUARDED_BY(mutex);
std::mutex mutex;
};
@ -29,7 +30,7 @@ public:
using Patterns = std::vector<const char *>;
LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters);
virtual ~LineReader() {}
virtual ~LineReader() = default;
/// Reads the whole line until delimiter (in multiline mode) or until the last line without extender.
/// If resulting line is empty, it means the user interrupted the input.

View File

@ -124,6 +124,23 @@
#endif
#endif
// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
#if defined(__clang__)
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) // data is protected by given capability
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) // pointed-to data is protected by the given capability
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) // thread needs exclusive possession of given capability
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) // thread needs shared possession of given capability
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) // annotated lock must be locked after given lock
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) // disable TSA for a function
#else
# define TSA_GUARDED_BY(...)
# define TSA_PT_GUARDED_BY(...)
# define TSA_REQUIRES(...)
# define TSA_REQUIRES_SHARED(...)
# define TSA_NO_THREAD_SAFETY_ANALYSIS
#endif
/// A template function for suppressing warnings about unused variables or function results.
template <typename... Args>
constexpr void UNUSED(Args &&... args [[maybe_unused]])

View File

@ -19,7 +19,6 @@ if (COMPILER_CLANG)
# Add some warnings that are not available even with -Wall -Wextra -Wpedantic.
# We want to get everything out of the compiler for code quality.
add_warning(everything)
add_warning(pedantic)
no_warning(vla-extension)
no_warning(zero-length-array)
@ -51,6 +50,7 @@ if (COMPILER_CLANG)
no_warning(vla)
no_warning(weak-template-vtables)
no_warning(weak-vtables)
no_warning(thread-safety-negative) # experimental flag, too many false positives
# TODO Enable conversion, sign-conversion, double-promotion warnings.
elseif (COMPILER_GCC)
# Add compiler options only to c++ compiler

View File

@ -78,6 +78,9 @@ target_compile_options(cxx PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-nostdinc++>)
# Third party library may have substandard code.
target_compile_options(cxx PRIVATE -w)
# Enable support for Clang-Thread-Safety-Analysis in libcxx
target_compile_definitions(cxx PUBLIC -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS)
target_link_libraries(cxx PUBLIC cxxabi)
# For __udivmodti4, __divmodti4.

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit 81b413cc1c2a33ad4e96df856b89184efbd6221c
Subproject commit 6062e711a919fb3b669b243b7dceabd045d0e4a2

View File

@ -10,7 +10,7 @@ set -x
#
# But under thread fuzzer, TSan build is too slow and this produces some flaky
# tests, so for now, as a temporary solution it had been disabled.
if ! test -f package_folder/*tsan*.deb; then
if ! test -f package_folder/clickhouse-server*tsan*.deb; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000

View File

@ -18,8 +18,10 @@ def get_options(i, backward_compatibility_check):
options.append("--db-engine=Ordinary")
if i % 3 == 2 and not backward_compatibility_check:
options.append('''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i))
client_options.append('allow_experimental_database_replicated=1')
options.append(
'''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
)
client_options.append("allow_experimental_database_replicated=1")
# If database name is not specified, new database is created for each functional test.
# Run some threads with one database for all tests.
@ -37,38 +39,58 @@ def get_options(i, backward_compatibility_check):
if i % 15 == 11:
client_options.append("join_algorithm='auto'")
client_options.append('max_rows_in_join=1000')
client_options.append("max_rows_in_join=1000")
if i == 13:
client_options.append('memory_tracker_fault_probability=0.001')
client_options.append("memory_tracker_fault_probability=0.001")
if client_options:
options.append(" --client-option " + ' '.join(client_options))
options.append(" --client-option " + " ".join(client_options))
return ' '.join(options)
return " ".join(options)
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit, backward_compatibility_check):
backward_compatibility_check_option = '--backward-compatibility-check' if backward_compatibility_check else ''
global_time_limit_option = ''
def run_func_test(
cmd,
output_prefix,
num_processes,
skip_tests_option,
global_time_limit,
backward_compatibility_check,
):
backward_compatibility_check_option = (
"--backward-compatibility-check" if backward_compatibility_check else ""
)
global_time_limit_option = ""
if global_time_limit:
global_time_limit_option = "--global_time_limit={}".format(global_time_limit)
output_paths = [os.path.join(output_prefix, "stress_test_run_{}.txt".format(i)) for i in range(num_processes)]
output_paths = [
os.path.join(output_prefix, "stress_test_run_{}.txt".format(i))
for i in range(num_processes)
]
pipes = []
for i in range(0, len(output_paths)):
f = open(output_paths[i], 'w')
full_command = "{} {} {} {} {}".format(cmd, get_options(i, backward_compatibility_check), global_time_limit_option, skip_tests_option, backward_compatibility_check_option)
f = open(output_paths[i], "w")
full_command = "{} {} {} {} {}".format(
cmd,
get_options(i, backward_compatibility_check),
global_time_limit_option,
skip_tests_option,
backward_compatibility_check_option,
)
logging.info("Run func tests '%s'", full_command)
p = Popen(full_command, shell=True, stdout=f, stderr=f)
pipes.append(p)
time.sleep(0.5)
return pipes
def compress_stress_logs(output_path, files_prefix):
cmd = f"cd {output_path} && tar -zcf stress_run_logs.tar.gz {files_prefix}* && rm {files_prefix}*"
check_output(cmd, shell=True)
def call_with_retry(query, timeout=30, retry_count=5):
for i in range(retry_count):
code = call(query, shell=True, stderr=STDOUT, timeout=timeout)
@ -77,6 +99,7 @@ def call_with_retry(query, timeout=30, retry_count=5):
else:
break
def make_query_command(query):
return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi --max_memory_usage_for_user=0"""
@ -93,28 +116,34 @@ def prepare_for_hung_check(drop_databases):
# ThreadFuzzer significantly slows down server and causes false-positive hung check failures
call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
call_with_retry(make_query_command('SELECT 1 FORMAT Null'))
call_with_retry(make_query_command("SELECT 1 FORMAT Null"))
# Some tests execute SYSTEM STOP MERGES or similar queries.
# It may cause some ALTERs to hang.
# Possibly we should fix tests and forbid to use such queries without specifying table.
call_with_retry(make_query_command('SYSTEM START MERGES'))
call_with_retry(make_query_command('SYSTEM START DISTRIBUTED SENDS'))
call_with_retry(make_query_command('SYSTEM START TTL MERGES'))
call_with_retry(make_query_command('SYSTEM START MOVES'))
call_with_retry(make_query_command('SYSTEM START FETCHES'))
call_with_retry(make_query_command('SYSTEM START REPLICATED SENDS'))
call_with_retry(make_query_command('SYSTEM START REPLICATION QUEUES'))
call_with_retry(make_query_command('SYSTEM DROP MARK CACHE'))
call_with_retry(make_query_command("SYSTEM START MERGES"))
call_with_retry(make_query_command("SYSTEM START DISTRIBUTED SENDS"))
call_with_retry(make_query_command("SYSTEM START TTL MERGES"))
call_with_retry(make_query_command("SYSTEM START MOVES"))
call_with_retry(make_query_command("SYSTEM START FETCHES"))
call_with_retry(make_query_command("SYSTEM START REPLICATED SENDS"))
call_with_retry(make_query_command("SYSTEM START REPLICATION QUEUES"))
call_with_retry(make_query_command("SYSTEM DROP MARK CACHE"))
# Issue #21004, live views are experimental, so let's just suppress it
call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'"))
# Kill other queries which known to be slow
# It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds
call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'"))
call_with_retry(
make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'")
)
# Long query from 00084_external_agregation
call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'"))
call_with_retry(
make_query_command(
"KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'"
)
)
if drop_databases:
for i in range(5):
@ -123,23 +152,35 @@ def prepare_for_hung_check(drop_databases):
# Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds).
#
# Also specify max_untracked_memory to allow 1GiB of memory to overcommit.
databases = check_output(make_query_command('SHOW DATABASES'), shell=True, timeout=30).decode('utf-8').strip().split()
databases = (
check_output(
make_query_command("SHOW DATABASES"), shell=True, timeout=30
)
.decode("utf-8")
.strip()
.split()
)
for db in databases:
if db == "system":
continue
command = make_query_command(f'DROP DATABASE {db}')
command = make_query_command(f"DROP DATABASE {db}")
# we don't wait for drop
Popen(command, shell=True)
break
except Exception as ex:
logging.error("Failed to SHOW or DROP databasese, will retry %s", str(ex))
logging.error(
"Failed to SHOW or DROP databasese, will retry %s", str(ex)
)
time.sleep(i)
else:
raise Exception("Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries")
raise Exception(
"Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries"
)
# Wait for last queries to finish if any, not longer than 300 seconds
call(make_query_command("""
call(
make_query_command(
"""
select sleepEachRow((
select maxOrDefault(300 - elapsed) + 1
from system.processes
@ -147,39 +188,58 @@ def prepare_for_hung_check(drop_databases):
) / 300)
from numbers(300)
format Null
"""), shell=True, stderr=STDOUT, timeout=330)
"""
),
shell=True,
stderr=STDOUT,
timeout=330,
)
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
# which still run some new queries. Let's ignore them.
try:
query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """
output = check_output(query, shell=True, stderr=STDOUT, timeout=30).decode('utf-8').strip()
output = (
check_output(query, shell=True, stderr=STDOUT, timeout=30)
.decode("utf-8")
.strip()
)
if int(output) == 0:
return False
except:
pass
return True
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for running stresstest")
parser.add_argument("--test-cmd", default='/usr/bin/clickhouse-test')
parser.add_argument("--skip-func-tests", default='')
parser.add_argument("--client-cmd", default='clickhouse-client')
parser.add_argument("--server-log-folder", default='/var/log/clickhouse-server')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for running stresstest"
)
parser.add_argument("--test-cmd", default="/usr/bin/clickhouse-test")
parser.add_argument("--skip-func-tests", default="")
parser.add_argument("--client-cmd", default="clickhouse-client")
parser.add_argument("--server-log-folder", default="/var/log/clickhouse-server")
parser.add_argument("--output-folder")
parser.add_argument("--global-time-limit", type=int, default=1800)
parser.add_argument("--num-parallel", type=int, default=cpu_count())
parser.add_argument('--backward-compatibility-check', action='store_true')
parser.add_argument('--hung-check', action='store_true', default=False)
parser.add_argument("--backward-compatibility-check", action="store_true")
parser.add_argument("--hung-check", action="store_true", default=False)
# make sense only for hung check
parser.add_argument('--drop-databases', action='store_true', default=False)
parser.add_argument("--drop-databases", action="store_true", default=False)
args = parser.parse_args()
if args.drop_databases and not args.hung_check:
raise Exception("--drop-databases only used in hung check (--hung-check)")
func_pipes = []
func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit, args.backward_compatibility_check)
func_pipes = run_func_test(
args.test_cmd,
args.output_folder,
args.num_parallel,
args.skip_func_tests,
args.global_time_limit,
args.backward_compatibility_check,
)
logging.info("Will wait functests to finish")
while True:
@ -205,32 +265,41 @@ if __name__ == "__main__":
have_long_running_queries = True
logging.error("Failed to prepare for hung check %s", str(ex))
logging.info("Checking if some queries hung")
cmd = ' '.join([args.test_cmd,
# Do not track memory allocations up to 1Gi,
# this will allow to ignore server memory limit (max_server_memory_usage) for this query.
#
# NOTE: memory_profiler_step should be also adjusted, because:
#
# untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step)
#
# NOTE: that if there will be queries with GROUP BY, this trick
# will not work due to CurrentMemoryTracker::check() from
# Aggregator code.
# But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY.
"--client-option", "max_untracked_memory=1Gi",
"--client-option", "max_memory_usage_for_user=0",
"--client-option", "memory_profiler_step=1Gi",
# Use system database to avoid CREATE/DROP DATABASE queries
"--database=system",
"--hung-check",
"00001_select_1"
])
cmd = " ".join(
[
args.test_cmd,
# Do not track memory allocations up to 1Gi,
# this will allow to ignore server memory limit (max_server_memory_usage) for this query.
#
# NOTE: memory_profiler_step should be also adjusted, because:
#
# untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step)
#
# NOTE: that if there will be queries with GROUP BY, this trick
# will not work due to CurrentMemoryTracker::check() from
# Aggregator code.
# But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY.
"--client-option",
"max_untracked_memory=1Gi",
"--client-option",
"max_memory_usage_for_user=0",
"--client-option",
"memory_profiler_step=1Gi",
# Use system database to avoid CREATE/DROP DATABASE queries
"--database=system",
"--hung-check",
"--stress",
"00001_select_1",
]
)
res = call(cmd, shell=True, stderr=STDOUT)
hung_check_status = "No queries hung\tOK\n"
if res != 0 and have_long_running_queries:
logging.info("Hung check failed with exit code {}".format(res))
hung_check_status = "Hung check failed\tFAIL\n"
with open(os.path.join(args.output_folder, "test_results.tsv"), 'w+') as results:
with open(
os.path.join(args.output_folder, "test_results.tsv"), "w+"
) as results:
results.write(hung_check_status)
logging.info("Stress test finished")

View File

@ -66,7 +66,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).

View File

@ -5,6 +5,8 @@ sidebar_label: Command-Line Client
# Command-line Client
## clickhouse-client
ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see [Configuring](#interfaces_cli_configuration).
[Install](../getting-started/install.md) it from the `clickhouse-client` package and run it with the command `clickhouse-client`.
@ -115,7 +117,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--user, -u` The username. Default value: default.
- `--password` The password. Default value: empty string.
- `--query, -q` The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option.
- `--queries-file, -qf` file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--queries-file` file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).
- `--multiline, -m` If specified, allow multiline queries (do not send the query on Enter).
- `--multiquery, -n` If specified, allow processing multiple queries separated by semicolons.
@ -183,4 +185,3 @@ If the configuration above is applied, the ID of a query is shown in the followi
``` text
speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d
```

View File

@ -31,12 +31,12 @@ $ clickhouse-local --structure "table_structure" --input-format "format_of_incom
Arguments:
- `-S`, `--structure` — table structure for input data.
- `-if`, `--input-format` — input format, `TSV` by default.
- `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
- `-qf`, `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `-of`, `--format`, `--output-format` — output format, `TSV` by default.
- `--format`, `--output-format` — output format, `TSV` by default.
- `-d`, `--database` — default database, `_local` by default.
- `--stacktrace` — whether to dump debug output in case of exception.
- `--echo` — print query before execution.

View File

@ -10,7 +10,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
| Feature | Support or workaround |
| --------| ----------|
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | supported |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported |
| `ROWS` frame | supported |
| `RANGE` frame | supported, the default |

View File

@ -121,7 +121,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
- `--user, -u` — имя пользователя, по умолчанию — default.
- `--password` — пароль, по умолчанию — пустая строка.
- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме.
- `--queries-file, -qf` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
- `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
- `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД default).
- `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter.
- `--multiquery, -n` — если указано — разрешить выполнять несколько запросов, разделённых точкой с запятой.

View File

@ -28,12 +28,12 @@ $ clickhouse-local --structure "table_structure" --input-format "format_of_incom
Ключи команды:
- `-S`, `--structure` — структура таблицы, в которую будут помещены входящие данные.
- `-if`, `--input-format` — формат входящих данных. По умолчанию — `TSV`.
- `--input-format` — формат входящих данных. По умолчанию — `TSV`.
- `-f`, `--file` — путь к файлу с данными. По умолчанию — `stdin`.
- `-q`, `--query` — запросы на выполнение. Разделитель запросов — `;`.
- `-qf`, `--queries-file` - путь к файлу с запросами для выполнения. Необходимо задать либо параметр `query`, либо `queries-file`.
- `--queries-file` - путь к файлу с запросами для выполнения. Необходимо задать либо параметр `query`, либо `queries-file`.
- `-N`, `--table` — имя таблицы, в которую будут помещены входящие данные. По умолчанию - `table`.
- `-of`, `--format`, `--output-format` — формат выходных данных. По умолчанию — `TSV`.
- `--format`, `--output-format` — формат выходных данных. По умолчанию — `TSV`.
- `-d`, `--database` — база данных по умолчанию. Если не указано, используется значение `_local`.
- `--stacktrace` — вывод отладочной информации при исключениях.
- `--echo` — перед выполнением запрос выводится в консоль.
@ -109,4 +109,3 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
├──────────┼──────────┤
...
```

View File

@ -29,12 +29,12 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin
参数:
- `-S`, `--structure` — 输入数据的表结构。
- `-if`, `--input-format` — 输入格式化类型, 默认是`TSV`。
- `--input-format` — 输入格式化类型, 默认是`TSV`。
- `-f`, `--file` — 数据路径, 默认是`stdin`。
- `-q`, `--query` — 要查询的SQL语句使用`;`做分隔符。您必须指定`query`或`queries-file`选项。
- `-qf`, `--queries-file` - 包含执行查询的文件路径。您必须指定`query`或`queries-file`选项。
- `--queries-file` - 包含执行查询的文件路径。您必须指定`query`或`queries-file`选项。
- `-N`, `--table` — 数据输出的表名,默认是`table`。
- `-of`, `--format`, `--output-format` — 输出格式化类型, 默认是`TSV`。
- `--format`, `--output-format` — 输出格式化类型, 默认是`TSV`。
- `-d`, `--database` — 默认数据库名,默认是`_local`。
- `--stacktrace` — 是否在出现异常时输出栈信息。
- `--echo` — 执行前打印查询。
@ -53,7 +53,7 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin
## 示例 {#examples}
``` bash
echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table"
echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" --input-format "CSV" -q "SELECT * FROM table"
Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
1 2
3 4

View File

@ -994,7 +994,7 @@ void Client::processConfig()
/// The value of the option is used as the text of query (or of multiple queries).
/// If stdin is not a terminal, INSERT data for the first query is read from it.
/// - stdin is not a terminal. In this case queries are read from it.
/// - -qf (--queries-file) command line option is present.
/// - --queries-file command line option is present.
/// The value of the option is used as file with query (or of multiple queries) to execute.
delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file"));

View File

@ -3,6 +3,7 @@
#include <mutex>
#include <Poco/Util/Application.h>
#include <base/defines.h>
namespace DB
{
@ -24,9 +25,9 @@ public:
private:
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
ConfigurationPtr config;
ConfigurationPtr config TSA_GUARDED_BY(keeper_dispatcher_mutex);
};
}

View File

@ -1,6 +1,8 @@
#pragma once
#include "SharedLibraryHandler.h"
#include <base/defines.h>
#include <unordered_map>
#include <mutex>
@ -30,7 +32,7 @@ public:
private:
/// map: dict_id -> sharedLibraryHandler
std::unordered_map<std::string, SharedLibraryHandlerPtr> library_handlers;
std::unordered_map<std::string, SharedLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
std::mutex mutex;
};

View File

@ -3,7 +3,6 @@
#include <Client/ClientBase.h>
#include <Client/LocalConnection.h>
#include <Common/ProgressIndication.h>
#include <Common/StatusFile.h>
#include <Common/InterruptListener.h>
#include <Loggers/Loggers.h>

View File

@ -4,6 +4,7 @@
#include <nanodbc/nanodbc.h>
#include <mutex>
#include <base/BorrowedObjectPool.h>
#include <base/defines.h>
#include <unordered_map>
@ -165,7 +166,7 @@ public:
private:
/// [connection_settings_string] -> [connection_pool]
using PoolFactory = std::unordered_map<std::string, nanodbc::PoolPtr>;
PoolFactory factory;
PoolFactory factory TSA_GUARDED_BY(mutex);
std::mutex mutex;
};

358
src/Access/AccessBackup.cpp Normal file
View File

@ -0,0 +1,358 @@
#include <Access/AccessBackup.h>
#include <Access/AccessControl.h>
#include <Access/AccessEntityIO.h>
#include <Access/Common/AccessRightsElement.h>
#include <Access/User.h>
#include <Access/Role.h>
#include <Access/SettingsProfile.h>
#include <Access/RowPolicy.h>
#include <Access/Quota.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackup.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <Poco/UUIDGenerator.h>
#include <base/insertAtEnd.h>
#include <boost/range/algorithm/copy.hpp>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
/// Represents a list of access entities as they're stored in a backup.
struct AccessEntitiesInBackup
{
std::unordered_map<UUID, AccessEntityPtr> entities;
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
BackupEntryPtr toBackupEntry() const
{
WriteBufferFromOwnString buf;
for (const auto & [id, entity] : entities)
{
writeText(id, buf);
writeChar('\t', buf);
writeText(entity->getTypeInfo().name, buf);
writeChar('\t', buf);
writeText(entity->getName(), buf);
writeChar('\n', buf);
writeText(serializeAccessEntity(*entity), buf);
writeChar('\n', buf);
}
if (!dependencies.empty())
{
writeText("DEPENDENCIES\n", buf);
for (const auto & [id, name_and_type] : dependencies)
{
writeText(id, buf);
writeChar('\t', buf);
writeText(AccessEntityTypeInfo::get(name_and_type.second).name, buf);
writeChar('\t', buf);
writeText(name_and_type.first, buf);
writeChar('\n', buf);
}
}
return std::make_shared<BackupEntryFromMemory>(buf.str());
}
static AccessEntitiesInBackup fromBackupEntry(const IBackupEntry & backup_entry, const String & file_path)
{
try
{
AccessEntitiesInBackup res;
std::unique_ptr<ReadBuffer> buf = backup_entry.getReadBuffer();
bool dependencies_found = false;
while (!buf->eof())
{
String line;
readStringUntilNewlineInto(line, *buf);
buf->ignore();
if (line == "DEPENDENCIES")
{
dependencies_found = true;
break;
}
size_t id_endpos = line.find('\t');
String id_as_string = line.substr(0, id_endpos);
UUID id = parse<UUID>(line);
line.clear();
String queries;
while (!buf->eof())
{
String query;
readStringUntilNewlineInto(query, *buf);
buf->ignore();
if (query.empty())
break;
if (!queries.empty())
queries.append("\n");
queries.append(query);
}
AccessEntityPtr entity = deserializeAccessEntity(queries);
res.entities.emplace(id, entity);
}
if (dependencies_found)
{
while (!buf->eof())
{
String id_as_string;
readStringInto(id_as_string, *buf);
buf->ignore();
UUID id = parse<UUID>(id_as_string);
String type_as_string;
readStringInto(type_as_string, *buf);
buf->ignore();
AccessEntityType type = AccessEntityTypeInfo::parseType(type_as_string);
String name;
readStringInto(name, *buf);
buf->ignore();
if (!res.entities.contains(id))
res.dependencies.emplace(id, std::pair{name, type});
}
}
return res;
}
catch (Exception & e)
{
e.addMessage("While parsing " + file_path);
throw;
}
}
};
std::vector<UUID> findDependencies(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities)
{
std::vector<UUID> res;
for (const auto & entity : entities | boost::adaptors::map_values)
insertAtEnd(res, entity->findDependencies());
/// Remove duplicates in the list of dependencies (some entities can refer to other entities).
::sort(res.begin(), res.end());
res.erase(std::unique(res.begin(), res.end()), res.end());
for (const auto & id : entities | boost::adaptors::map_keys)
{
auto it = std::lower_bound(res.begin(), res.end(), id);
if ((it != res.end()) && (*it == id))
res.erase(it);
}
return res;
}
std::unordered_map<UUID, std::pair<String, AccessEntityType>> readDependenciesNamesAndTypes(const std::vector<UUID> & dependencies, const AccessControl & access_control)
{
std::unordered_map<UUID, std::pair<String, AccessEntityType>> res;
for (const auto & id : dependencies)
{
if (auto name_and_type = access_control.tryReadNameWithType(id))
res.emplace(id, name_and_type.value());
}
return res;
}
std::unordered_map<UUID, UUID> resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies, const AccessControl & access_control, bool allow_unresolved_dependencies)
{
std::unordered_map<UUID, UUID> old_to_new_ids;
for (const auto & [id, name_and_type] : dependencies)
{
std::optional<UUID> new_id;
if (allow_unresolved_dependencies)
new_id = access_control.find(name_and_type.second, name_and_type.first);
else
new_id = access_control.getID(name_and_type.second, name_and_type.first);
if (new_id)
old_to_new_ids.emplace(id, *new_id);
}
return old_to_new_ids;
}
void generateRandomIDs(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, std::unordered_map<UUID, UUID> & old_to_new_ids)
{
Poco::UUIDGenerator generator;
for (auto & [id, entity] : entities)
{
UUID new_id;
generator.createRandom().copyTo(reinterpret_cast<char *>(&new_id));
old_to_new_ids.emplace(id, new_id);
id = new_id;
}
}
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
for (auto & entity : entities | boost::adaptors::map_values)
{
bool need_replace = false;
for (const auto & dependency : entity->findDependencies())
{
if (old_to_new_ids.contains(dependency))
{
need_replace = true;
break;
}
}
if (!need_replace)
continue;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
}
AccessRightsElements getRequiredAccessToRestore(const std::unordered_map<UUID, AccessEntityPtr> & entities)
{
AccessRightsElements res;
for (const auto & entity : entities | boost::adaptors::map_values)
{
auto entity_type = entity->getType();
switch (entity_type)
{
case User::TYPE:
{
const auto & user = typeid_cast<const User &>(*entity);
res.emplace_back(AccessType::CREATE_USER);
auto elements = user.access.getElements();
for (auto & element : elements)
{
if (element.is_partial_revoke)
continue;
element.grant_option = true;
res.emplace_back(element);
}
if (!user.granted_roles.isEmpty())
res.emplace_back(AccessType::ROLE_ADMIN);
break;
}
case Role::TYPE:
{
const auto & role = typeid_cast<const Role &>(*entity);
res.emplace_back(AccessType::CREATE_ROLE);
auto elements = role.access.getElements();
for (auto & element : elements)
{
if (element.is_partial_revoke)
continue;
element.grant_option = true;
res.emplace_back(element);
}
if (!role.granted_roles.isEmpty())
res.emplace_back(AccessType::ROLE_ADMIN);
break;
}
case SettingsProfile::TYPE:
{
res.emplace_back(AccessType::CREATE_SETTINGS_PROFILE);
break;
}
case RowPolicy::TYPE:
{
const auto & policy = typeid_cast<const RowPolicy &>(*entity);
res.emplace_back(AccessType::CREATE_ROW_POLICY, policy.getDatabase(), policy.getTableName());
break;
}
case Quota::TYPE:
{
res.emplace_back(AccessType::CREATE_QUOTA);
break;
}
default:
throw Exception("Unknown type: " + toString(entity_type), ErrorCodes::LOGICAL_ERROR);
}
}
return res;
}
}
void backupAccessEntities(
BackupEntriesCollector & backup_entries_collector,
const String & data_path_in_backup,
const AccessControl & access_control,
AccessEntityType type)
{
auto entities = access_control.readAllForBackup(type, backup_entries_collector.getBackupSettings());
auto dependencies = readDependenciesNamesAndTypes(findDependencies(entities), access_control);
AccessEntitiesInBackup ab;
boost::range::copy(entities, std::inserter(ab.entities, ab.entities.end()));
ab.dependencies = std::move(dependencies);
backup_entries_collector.addBackupEntry(fs::path{data_path_in_backup} / "access.txt", ab.toBackupEntry());
}
AccessRestoreTask::AccessRestoreTask(
const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr<IRestoreCoordination> restore_coordination_)
: backup(backup_), restore_settings(restore_settings_), restore_coordination(restore_coordination_)
{
}
AccessRestoreTask::~AccessRestoreTask() = default;
void AccessRestoreTask::addDataPath(const String & data_path)
{
if (!data_paths.emplace(data_path).second)
return;
String file_path = fs::path{data_path} / "access.txt";
auto backup_entry = backup->readFile(file_path);
auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, file_path);
boost::range::copy(ab.entities, std::inserter(entities, entities.end()));
boost::range::copy(ab.dependencies, std::inserter(dependencies, dependencies.end()));
for (const auto & id : entities | boost::adaptors::map_keys)
dependencies.erase(id);
}
bool AccessRestoreTask::hasDataPath(const String & data_path) const
{
return data_paths.contains(data_path);
}
AccessRightsElements AccessRestoreTask::getRequiredAccess() const
{
return getRequiredAccessToRestore(entities);
}
void AccessRestoreTask::restore(AccessControl & access_control) const
{
auto old_to_new_ids = resolveDependencies(dependencies, access_control, restore_settings.allow_unresolved_access_dependencies);
std::vector<std::pair<UUID, AccessEntityPtr>> new_entities;
boost::range::copy(entities, std::back_inserter(new_entities));
generateRandomIDs(new_entities, old_to_new_ids);
replaceDependencies(new_entities, old_to_new_ids);
access_control.insertFromBackup(new_entities, restore_settings, restore_coordination);
}
}

56
src/Access/AccessBackup.h Normal file
View File

@ -0,0 +1,56 @@
#pragma once
#include <Backups/RestoreSettings.h>
#include <unordered_map>
#include <unordered_set>
namespace DB
{
class AccessControl;
enum class AccessEntityType;
class BackupEntriesCollector;
class RestorerFromBackup;
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
class IRestoreCoordination;
struct IAccessEntity;
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;
class AccessRightsElements;
/// Makes a backup of access entities of a specified type.
void backupAccessEntities(
BackupEntriesCollector & backup_entries_collector,
const String & data_path_in_backup,
const AccessControl & access_control,
AccessEntityType type);
/// Restores access entities from a backup.
class AccessRestoreTask
{
public:
AccessRestoreTask(
const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr<IRestoreCoordination> restore_coordination_);
~AccessRestoreTask();
/// Adds a data path to loads access entities from.
void addDataPath(const String & data_path);
bool hasDataPath(const String & data_path) const;
/// Checks that the current user can do restoring.
AccessRightsElements getRequiredAccess() const;
/// Inserts all access entities loaded from all the paths added by addDataPath().
void restore(AccessControl & access_control) const;
private:
BackupPtr backup;
RestoreSettings restore_settings;
std::shared_ptr<IRestoreCoordination> restore_coordination;
std::unordered_map<UUID, AccessEntityPtr> entities;
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
std::unordered_set<String> data_paths;
};
}

View File

@ -15,7 +15,11 @@
#include <Access/User.h>
#include <Access/ExternalAuthenticators.h>
#include <Access/AccessChangesNotifier.h>
#include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Core/Settings.h>
#include <base/defines.h>
#include <base/find_symbols.h>
#include <Poco/AccessExpireCache.h>
#include <boost/algorithm/string/join.hpp>
@ -130,7 +134,7 @@ public:
}
private:
Strings registered_prefixes;
Strings registered_prefixes TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};
@ -184,39 +188,25 @@ void AccessControl::setUsersConfig(const Poco::Util::AbstractConfiguration & use
return;
}
}
addUsersConfigStorage(users_config_);
addUsersConfigStorage(UsersConfigAccessStorage::STORAGE_TYPE, users_config_, false);
}
void AccessControl::addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_)
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_, bool allow_backup_)
{
addUsersConfigStorage(UsersConfigAccessStorage::STORAGE_TYPE, users_config_);
}
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_)
{
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this, allow_backup_);
new_storage->setConfig(users_config_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}",
String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
void AccessControl::addUsersConfigStorage(
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
addUsersConfigStorage(
UsersConfigAccessStorage::STORAGE_TYPE, users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
}
void AccessControl::addUsersConfigStorage(
const String & storage_name_,
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
const zkutil::GetZooKeeper & get_zookeeper_function_,
bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -227,7 +217,7 @@ void AccessControl::addUsersConfigStorage(
return;
}
}
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this, allow_backup_);
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
@ -237,7 +227,8 @@ void AccessControl::addUsersConfigStorage(
void AccessControl::addReplicatedStorage(
const String & storage_name_,
const String & zookeeper_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
const zkutil::GetZooKeeper & get_zookeeper_function_,
bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -245,17 +236,12 @@ void AccessControl::addReplicatedStorage(
if (auto replicated_storage = typeid_cast<std::shared_ptr<ReplicatedAccessStorage>>(storage))
return;
}
auto new_storage = std::make_shared<ReplicatedAccessStorage>(storage_name_, zookeeper_path_, get_zookeeper_function_, *changes_notifier);
auto new_storage = std::make_shared<ReplicatedAccessStorage>(storage_name_, zookeeper_path_, get_zookeeper_function_, *changes_notifier, allow_backup_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName());
}
void AccessControl::addDiskStorage(const String & directory_, bool readonly_)
{
addDiskStorage(DiskAccessStorage::STORAGE_TYPE, directory_, readonly_);
}
void AccessControl::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_)
void AccessControl::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_, bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -270,13 +256,13 @@ void AccessControl::addDiskStorage(const String & storage_name_, const String &
}
}
}
auto new_storage = std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_, *changes_notifier);
auto new_storage = std::make_shared<DiskAccessStorage>(storage_name_, directory_, *changes_notifier, readonly_, allow_backup_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
void AccessControl::addMemoryStorage(const String & storage_name_)
void AccessControl::addMemoryStorage(const String & storage_name_, bool allow_backup_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
@ -284,7 +270,7 @@ void AccessControl::addMemoryStorage(const String & storage_name_)
if (auto memory_storage = typeid_cast<std::shared_ptr<MemoryAccessStorage>>(storage))
return;
}
auto new_storage = std::make_shared<MemoryAccessStorage>(storage_name_, *changes_notifier);
auto new_storage = std::make_shared<MemoryAccessStorage>(storage_name_, *changes_notifier, allow_backup_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName());
}
@ -327,20 +313,23 @@ void AccessControl::addStoragesFromUserDirectoriesConfig(
if (type == MemoryAccessStorage::STORAGE_TYPE)
{
addMemoryStorage(name);
bool allow_backup = config.getBool(prefix + ".allow_backup", true);
addMemoryStorage(name, allow_backup);
}
else if (type == UsersConfigAccessStorage::STORAGE_TYPE)
{
String path = config.getString(prefix + ".path");
if (std::filesystem::path{path}.is_relative() && std::filesystem::exists(config_dir + path))
path = config_dir + path;
addUsersConfigStorage(name, path, include_from_path, dbms_dir, get_zookeeper_function);
bool allow_backup = config.getBool(prefix + ".allow_backup", false); /// We don't backup users.xml by default.
addUsersConfigStorage(name, path, include_from_path, dbms_dir, get_zookeeper_function, allow_backup);
}
else if (type == DiskAccessStorage::STORAGE_TYPE)
{
String path = config.getString(prefix + ".path");
bool readonly = config.getBool(prefix + ".readonly", false);
addDiskStorage(name, path, readonly);
bool allow_backup = config.getBool(prefix + ".allow_backup", true);
addDiskStorage(name, path, readonly, allow_backup);
}
else if (type == LDAPAccessStorage::STORAGE_TYPE)
{
@ -349,7 +338,8 @@ void AccessControl::addStoragesFromUserDirectoriesConfig(
else if (type == ReplicatedAccessStorage::STORAGE_TYPE)
{
String zookeeper_path = config.getString(prefix + ".zookeeper_path");
addReplicatedStorage(name, zookeeper_path, get_zookeeper_function);
bool allow_backup = config.getBool(prefix + ".allow_backup", true);
addReplicatedStorage(name, zookeeper_path, get_zookeeper_function, allow_backup);
}
else
throw Exception("Unknown storage type '" + type + "' at " + prefix + " in config", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
@ -383,12 +373,18 @@ void AccessControl::addStoragesFromMainConfig(
if (users_config_path != config_path)
checkForUsersNotInMainConfig(config, config_path, users_config_path, getLogger());
addUsersConfigStorage(users_config_path, include_from_path, dbms_dir, get_zookeeper_function);
addUsersConfigStorage(
UsersConfigAccessStorage::STORAGE_TYPE,
users_config_path,
include_from_path,
dbms_dir,
get_zookeeper_function,
/* allow_backup= */ false);
}
String disk_storage_dir = config.getString("access_control_path", "");
if (!disk_storage_dir.empty())
addDiskStorage(disk_storage_dir);
addDiskStorage(DiskAccessStorage::STORAGE_TYPE, disk_storage_dir, /* readonly= */ false, /* allow_backup= */ true);
if (has_user_directories)
addStoragesFromUserDirectoriesConfig(config, "user_directories", config_dir, dbms_dir, include_from_path, get_zookeeper_function);
@ -463,6 +459,23 @@ UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Ne
}
}
void AccessControl::backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const
{
backupAccessEntities(backup_entries_collector, data_path_in_backup, *this, type);
}
void AccessControl::restore(RestorerFromBackup & restorer, const String & data_path_in_backup)
{
/// The restorer must already know about `data_path_in_backup`, but let's check.
restorer.checkPathInBackupToRestoreAccess(data_path_in_backup);
}
void AccessControl::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination)
{
MultipleAccessStorage::insertFromBackup(entities_from_backup, restore_settings, restore_coordination);
changes_notifier->sendNotifications();
}
void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
{
external_authenticators->setConfiguration(config, getLogger());

View File

@ -42,6 +42,8 @@ class ClientInfo;
class ExternalAuthenticators;
class AccessChangesNotifier;
struct Settings;
class BackupEntriesCollector;
class RestorerFromBackup;
/// Manages access control entities.
@ -60,37 +62,31 @@ public:
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_);
/// Adds UsersConfigAccessStorage.
void addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_);
void addUsersConfigStorage(const String & storage_name_,
const Poco::Util::AbstractConfiguration & users_config_);
void addUsersConfigStorage(const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_ = {});
const Poco::Util::AbstractConfiguration & users_config_,
bool allow_backup_);
void addUsersConfigStorage(const String & storage_name_,
const String & users_config_path_,
const String & include_from_path_,
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_ = {});
const zkutil::GetZooKeeper & get_zookeeper_function_,
bool allow_backup_);
/// Loads access entities from the directory on the local disk.
/// Use that directory to keep created users/roles/etc.
void addDiskStorage(const String & directory_, bool readonly_ = false);
void addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_ = false);
void addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_, bool allow_backup_);
/// Adds MemoryAccessStorage which keeps access entities in memory.
void addMemoryStorage();
void addMemoryStorage(const String & storage_name_);
void addMemoryStorage(const String & storage_name_, bool allow_backup_);
/// Adds LDAPAccessStorage which allows querying remote LDAP server for user info.
void addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_);
void addReplicatedStorage(const String & storage_name,
const String & zookeeper_path,
const zkutil::GetZooKeeper & get_zookeeper_function);
const zkutil::GetZooKeeper & get_zookeeper_function,
bool allow_backup);
/// Adds storages from <users_directories> config.
void addStoragesFromUserDirectoriesConfig(const Poco::Util::AbstractConfiguration & config,
@ -123,6 +119,11 @@ public:
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
/// Makes a backup of access entities.
void backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const;
static void restore(RestorerFromBackup & restorer, const String & data_path_in_backup);
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
/// Sets the default profile's name.
@ -197,6 +198,8 @@ public:
/// Gets manager of notifications.
AccessChangesNotifier & getChangesNotifier();
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
class ContextAccessCache;
class CustomSettingsPrefixes;

View File

@ -736,6 +736,18 @@ AccessRights::AccessRights(const AccessFlags & access)
}
AccessRights::AccessRights(const AccessRightsElement & element)
{
grant(element);
}
AccessRights::AccessRights(const AccessRightsElements & elements)
{
grant(elements);
}
bool AccessRights::isEmpty() const
{
return !root && !root_with_grant_option;

View File

@ -16,6 +16,9 @@ class AccessRights
public:
AccessRights();
explicit AccessRights(const AccessFlags & access);
explicit AccessRights(const AccessRightsElement & element);
explicit AccessRights(const AccessRightsElements & elements);
~AccessRights();
AccessRights(const AccessRights & src);
AccessRights & operator =(const AccessRights & src);

View File

@ -1,7 +1,9 @@
#include <Access/Common/AccessEntityType.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <base/range.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/replace.hpp>
@ -15,6 +17,7 @@ namespace ErrorCodes
extern const int UNKNOWN_QUOTA;
extern const int THERE_IS_NO_PROFILE;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
@ -83,4 +86,15 @@ const AccessEntityTypeInfo & AccessEntityTypeInfo::get(AccessEntityType type_)
throw Exception("Unknown type: " + std::to_string(static_cast<size_t>(type_)), ErrorCodes::LOGICAL_ERROR);
}
AccessEntityType AccessEntityTypeInfo::parseType(const String & name_)
{
for (auto type : collections::range(AccessEntityType::MAX))
{
const auto & info = get(type);
if (boost::iequals(info.name, name_))
return type;
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type: {}", name_);
}
}

View File

@ -35,6 +35,7 @@ struct AccessEntityTypeInfo
String formatEntityNameWithType(const String & entity_name) const;
static const AccessEntityTypeInfo & get(AccessEntityType type_);
static AccessEntityType parseType(const String & name_);
};
}

View File

@ -99,6 +99,7 @@ enum class AccessType
\
M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \
M(OPTIMIZE, "OPTIMIZE TABLE", TABLE, ALL) \
M(BACKUP, "", TABLE, ALL) /* allows to backup tables */\
\
M(KILL_QUERY, "", GLOBAL, ALL) /* allows to kill a query started by another user
(anyone can kill his own queries) */\

View File

@ -1,6 +1,7 @@
#include <Access/DiskAccessStorage.h>
#include <Access/AccessEntityIO.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestoreSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
@ -165,11 +166,12 @@ namespace
}
DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_, AccessChangesNotifier & changes_notifier_)
DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_)
: IAccessStorage(storage_name_), changes_notifier(changes_notifier_)
{
directory_path = makeDirectoryPathCanonical(directory_path_);
readonly = readonly_;
backup_allowed = allow_backup_;
std::error_code create_dir_error_code;
std::filesystem::create_directories(directory_path, create_dir_error_code);
@ -457,7 +459,7 @@ AccessEntityPtr DiskAccessStorage::readImpl(const UUID & id, bool throw_if_not_e
}
std::optional<String> DiskAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
std::lock_guard lock{mutex};
auto it = entries_by_id.find(id);
@ -468,21 +470,27 @@ std::optional<String> DiskAccessStorage::readNameImpl(const UUID & id, bool thro
else
return std::nullopt;
}
return it->second.name;
return std::make_pair(it->second.name, it->second.type);
}
std::optional<UUID> DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
UUID id = generateRandomID();
std::lock_guard lock{mutex};
if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists))
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
return id;
return std::nullopt;
}
bool DiskAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
}
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const String & name = new_entity->getName();
@ -649,4 +657,20 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const
throw Exception("Couldn't delete " + file_path, ErrorCodes::FILE_DOESNT_EXIST);
}
void DiskAccessStorage::insertFromBackup(
const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup,
const RestoreSettings & restore_settings,
std::shared_ptr<IRestoreCoordination>)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate);
for (const auto & [id, entity] : entities_from_backup)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
}
}

View File

@ -15,7 +15,7 @@ class DiskAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "local directory";
DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_, AccessChangesNotifier & changes_notifier_);
DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_);
~DiskAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -29,11 +29,14 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
@ -47,6 +50,7 @@ private:
void listsWritingThreadFunc();
void stopListsWritingThread();
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -65,7 +69,6 @@ private:
};
String directory_path;
std::atomic<bool> readonly;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
boost::container::flat_set<AccessEntityType> types_of_lists_to_write;
@ -74,6 +77,8 @@ private:
std::condition_variable lists_writing_thread_should_exit; /// Signals `lists_writing_thread` to exit.
bool lists_writing_thread_is_waiting = false;
AccessChangesNotifier & changes_notifier;
std::atomic<bool> readonly;
std::atomic<bool> backup_allowed;
mutable std::mutex mutex;
};
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Access/SettingsProfileElement.h>
#include <base/defines.h>
#include <Core/UUID.h>
#include <boost/container/flat_set.hpp>
#include <mutex>
@ -42,7 +43,7 @@ private:
void setInfo(const std::shared_ptr<const SettingsProfilesInfo> & info_);
const Params params;
std::shared_ptr<const SettingsProfilesInfo> info;
std::shared_ptr<const SettingsProfilesInfo> info TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};
}

View File

@ -231,18 +231,23 @@ void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco
params.prefix = config.getString(prefix + ".prefix");
}
void ExternalAuthenticators::reset()
void ExternalAuthenticators::resetImpl()
{
std::scoped_lock lock(mutex);
ldap_client_params_blueprint.clear();
ldap_caches.clear();
kerberos_params.reset();
}
void ExternalAuthenticators::reset()
{
std::scoped_lock lock(mutex);
resetImpl();
}
void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log)
{
std::scoped_lock lock(mutex);
reset();
resetImpl();
Poco::Util::AbstractConfiguration::Keys all_keys;
config.keys("", all_keys);

View File

@ -3,6 +3,7 @@
#include <Access/LDAPClient.h>
#include <Access/Credentials.h>
#include <Access/GSSAcceptor.h>
#include <base/defines.h>
#include <base/types.h>
#include <chrono>
@ -22,7 +23,6 @@ namespace Poco
}
}
namespace DB
{
@ -51,10 +51,12 @@ private:
using LDAPCaches = std::map<String, LDAPCache>; // server name -> cache
using LDAPParams = std::map<String, LDAPClient::Params>; // server name -> params
mutable std::recursive_mutex mutex;
LDAPParams ldap_client_params_blueprint;
mutable LDAPCaches ldap_caches;
std::optional<GSSAcceptorContext::Params> kerberos_params;
mutable std::mutex mutex;
LDAPParams ldap_client_params_blueprint TSA_GUARDED_BY(mutex) ;
mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ;
std::optional<GSSAcceptorContext::Params> kerberos_params TSA_GUARDED_BY(mutex) ;
void resetImpl() TSA_REQUIRES(mutex);
};
void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix);

View File

@ -2,6 +2,8 @@
#include <Access/RolesOrUsersSet.h>
#include <boost/range/algorithm/set_algorithm.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
#include <boost/range/algorithm/copy.hpp>
namespace DB
{
@ -166,4 +168,57 @@ void GrantedRoles::makeIntersection(const GrantedRoles & other)
return other.roles_with_admin_option.find(id) == other.roles_with_admin_option.end();
});
}
std::vector<UUID> GrantedRoles::findDependencies() const
{
std::vector<UUID> res;
boost::range::copy(roles, std::back_inserter(res));
return res;
}
void GrantedRoles::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
std::vector<UUID> new_ids;
for (auto it = roles.begin(); it != roles.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = roles.erase(it);
}
else
{
++it;
}
}
if (!new_ids.empty())
{
boost::range::copy(new_ids, std::inserter(roles, roles.end()));
new_ids.clear();
for (auto it = roles_with_admin_option.begin(); it != roles_with_admin_option.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = roles_with_admin_option.erase(it);
}
else
{
++it;
}
}
boost::range::copy(new_ids, std::inserter(roles_with_admin_option, roles_with_admin_option.end()));
}
}
}

View File

@ -3,6 +3,7 @@
#include <Core/UUID.h>
#include <boost/container/flat_set.hpp>
#include <vector>
#include <unordered_map>
namespace DB
@ -24,6 +25,8 @@ public:
void revokeAdminOption(const UUID & role_);
void revokeAdminOption(const std::vector<UUID> & roles_);
bool isEmpty() const { return roles.empty(); }
bool isGranted(const UUID & role_) const;
bool isGrantedWithAdminOption(const UUID & role_) const;
@ -54,6 +57,9 @@ public:
friend bool operator ==(const GrantedRoles & left, const GrantedRoles & right) { return (left.roles == right.roles) && (left.roles_with_admin_option == right.roles_with_admin_option); }
friend bool operator !=(const GrantedRoles & left, const GrantedRoles & right) { return !(left == right); }
std::vector<UUID> findDependencies() const;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids);
private:
boost::container::flat_set<UUID> roles;
boost::container::flat_set<UUID> roles_with_admin_option;

View File

@ -4,6 +4,7 @@
#include <Common/typeid_cast.h>
#include <base/types.h>
#include <memory>
#include <unordered_map>
namespace DB
@ -45,6 +46,15 @@ struct IAccessEntity
bool operator()(const std::shared_ptr<const IAccessEntity> & lhs, const std::shared_ptr<const IAccessEntity> & rhs) const { return operator()(*lhs, *rhs); }
};
/// Finds all dependencies.
virtual std::vector<UUID> findDependencies() const { return {}; }
/// Replaces dependencies according to a specified map.
virtual void replaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
/// Whether this access entity should be written to a backup.
virtual bool isBackupAllowed() const { return false; }
protected:
String name;

View File

@ -10,6 +10,7 @@
#include <base/FnTraits.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
namespace DB
@ -19,6 +20,7 @@ namespace ErrorCodes
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
extern const int ACCESS_ENTITY_NOT_FOUND;
extern const int ACCESS_STORAGE_READONLY;
extern const int ACCESS_STORAGE_DOESNT_ALLOW_BACKUP;
extern const int WRONG_PASSWORD;
extern const int IP_ADDRESS_NOT_ALLOWED;
extern const int LOGICAL_ERROR;
@ -83,13 +85,15 @@ std::vector<UUID> IAccessStorage::getIDs(AccessEntityType type, const Strings &
String IAccessStorage::readName(const UUID & id) const
{
return *readNameImpl(id, /* throw_if_not_exists = */ true);
return readNameWithType(id).first;
}
std::optional<String> IAccessStorage::readName(const UUID & id, bool throw_if_not_exists) const
{
return readNameImpl(id, throw_if_not_exists);
if (auto name_and_type = readNameWithType(id, throw_if_not_exists))
return name_and_type->first;
return std::nullopt;
}
@ -99,7 +103,7 @@ Strings IAccessStorage::readNames(const std::vector<UUID> & ids, bool throw_if_n
res.reserve(ids.size());
for (const auto & id : ids)
{
if (auto name = readNameImpl(id, throw_if_not_exists))
if (auto name = readName(id, throw_if_not_exists))
res.emplace_back(std::move(name).value());
}
return res;
@ -118,14 +122,42 @@ Strings IAccessStorage::tryReadNames(const std::vector<UUID> & ids) const
}
std::optional<String> IAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::pair<String, AccessEntityType> IAccessStorage::readNameWithType(const UUID & id) const
{
return *readNameWithTypeImpl(id, /* throw_if_not_exists = */ true);
}
std::optional<std::pair<String, AccessEntityType>> IAccessStorage::readNameWithType(const UUID & id, bool throw_if_not_exists) const
{
return readNameWithTypeImpl(id, throw_if_not_exists);
}
std::optional<std::pair<String, AccessEntityType>> IAccessStorage::tryReadNameWithType(const UUID & id) const
{
return readNameWithTypeImpl(id, /* throw_if_not_exists = */ false);
}
std::optional<std::pair<String, AccessEntityType>> IAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
if (auto entity = read(id, throw_if_not_exists))
return entity->getName();
return std::make_pair(entity->getName(), entity->getType());
return std::nullopt;
}
std::vector<std::pair<UUID, AccessEntityPtr>> IAccessStorage::readAllWithIDs(AccessEntityType type) const
{
std::vector<std::pair<UUID, AccessEntityPtr>> entities;
for (const auto & id : findAll(type))
{
if (auto entity = tryRead(id))
entities.emplace_back(id, entity);
}
return entities;
}
UUID IAccessStorage::insert(const AccessEntityPtr & entity)
{
return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
@ -488,6 +520,29 @@ bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddr
}
bool IAccessStorage::isRestoreAllowed() const
{
return isBackupAllowed() && !isReadOnly();
}
std::vector<std::pair<UUID, AccessEntityPtr>> IAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings &) const
{
if (!isBackupAllowed())
throwBackupNotAllowed();
auto res = readAllWithIDs(type);
boost::range::remove_erase_if(res, [](const std::pair<UUID, AccessEntityPtr> & x) { return !x.second->isBackupAllowed(); });
return res;
}
void IAccessStorage::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> &, const RestoreSettings &, std::shared_ptr<IRestoreCoordination>)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "insertFromBackup() is not implemented in {}", getStorageType());
}
UUID IAccessStorage::generateRandomID()
{
static Poco::UUIDGenerator generator;
@ -577,6 +632,7 @@ void IAccessStorage::throwReadonlyCannotRemove(AccessEntityType type, const Stri
ErrorCodes::ACCESS_STORAGE_READONLY);
}
void IAccessStorage::throwAddressNotAllowed(const Poco::Net::IPAddress & address)
{
throw Exception("Connections from " + address.toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED);
@ -589,9 +645,20 @@ void IAccessStorage::throwAuthenticationTypeNotAllowed(AuthenticationType auth_t
"Authentication type {} is not allowed, check the setting allow_{} in the server configuration",
toString(auth_type), AuthenticationTypeInfo::get(auth_type).name);
}
void IAccessStorage::throwInvalidCredentials()
{
throw Exception("Invalid credentials", ErrorCodes::WRONG_PASSWORD);
}
void IAccessStorage::throwBackupNotAllowed() const
{
throw Exception(ErrorCodes::ACCESS_STORAGE_DOESNT_ALLOW_BACKUP, "Backup of access entities is not allowed in {}", getStorageName());
}
void IAccessStorage::throwRestoreNotAllowed() const
{
throw Exception(ErrorCodes::ACCESS_STORAGE_DOESNT_ALLOW_BACKUP, "Restore of access entities is not allowed in {}", getStorageName());
}
}

View File

@ -18,6 +18,9 @@ struct User;
class Credentials;
class ExternalAuthenticators;
enum class AuthenticationType;
struct BackupSettings;
struct RestoreSettings;
class IRestoreCoordination;
/// Contains entities, i.e. instances of classes derived from IAccessEntity.
/// The implementations of this class MUST be thread-safe.
@ -101,6 +104,16 @@ public:
std::optional<String> tryReadName(const UUID & id) const;
Strings tryReadNames(const std::vector<UUID> & ids) const;
std::pair<String, AccessEntityType> readNameWithType(const UUID & id) const;
std::optional<std::pair<String, AccessEntityType>> readNameWithType(const UUID & id, bool throw_if_not_exists) const;
std::optional<std::pair<String, AccessEntityType>> tryReadNameWithType(const UUID & id) const;
/// Reads all entities and returns them with their IDs.
template <typename EntityClassT>
std::vector<std::pair<UUID, std::shared_ptr<const EntityClassT>>> readAllWithIDs() const;
std::vector<std::pair<UUID, AccessEntityPtr>> readAllWithIDs(AccessEntityType type) const;
/// Inserts an entity to the storage. Returns ID of a new entry in the storage.
/// Throws an exception if the specified name already exists.
UUID insert(const AccessEntityPtr & entity);
@ -143,11 +156,19 @@ public:
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool allow_no_password, bool allow_plaintext_password) const;
std::optional<UUID> authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const;
/// Returns true if this storage can be stored to or restored from a backup.
virtual bool isBackupAllowed() const { return false; }
virtual bool isRestoreAllowed() const;
/// Makes a backup of this access storage.
virtual std::vector<std::pair<UUID, AccessEntityPtr>> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const;
virtual void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination);
protected:
virtual std::optional<UUID> findImpl(AccessEntityType type, const String & name) const = 0;
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const = 0;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0;
virtual std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const;
virtual std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -170,6 +191,8 @@ protected:
[[noreturn]] static void throwAddressNotAllowed(const Poco::Net::IPAddress & address);
[[noreturn]] static void throwInvalidCredentials();
[[noreturn]] static void throwAuthenticationTypeNotAllowed(AuthenticationType auth_type);
[[noreturn]] void throwBackupNotAllowed() const;
[[noreturn]] void throwRestoreNotAllowed() const;
private:
const String storage_name;
@ -218,4 +241,17 @@ std::shared_ptr<const EntityClassT> IAccessStorage::tryRead(const String & name)
{
return read<EntityClassT>(name, false);
}
template <typename EntityClassT>
std::vector<std::pair<UUID, std::shared_ptr<const EntityClassT>>> IAccessStorage::readAllWithIDs() const
{
std::vector<std::pair<UUID, std::shared_ptr<const EntityClassT>>> entities;
for (const auto & id : findAll<EntityClassT>())
{
if (auto entity = tryRead<EntityClassT>(id))
entities.emplace_back(id, entity);
}
return entities;
}
}

View File

@ -28,7 +28,7 @@ namespace ErrorCodes
LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix)
: IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier())
: IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier(), false)
{
setConfiguration(config, prefix);
}
@ -36,6 +36,7 @@ LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl
String LDAPAccessStorage::getLDAPServerName() const
{
std::scoped_lock lock(mutex);
return ldap_server_name;
}
@ -442,10 +443,10 @@ AccessEntityPtr LDAPAccessStorage::readImpl(const UUID & id, bool throw_if_not_e
}
std::optional<String> LDAPAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> LDAPAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
std::scoped_lock lock(mutex);
return memory_storage.readName(id, throw_if_not_exists);
return memory_storage.readNameWithType(id, throw_if_not_exists);
}
@ -504,4 +505,5 @@ std::optional<UUID> LDAPAccessStorage::authenticateImpl(
return id;
}
}

View File

@ -47,7 +47,7 @@ private: // IAccessStorage implementations.
virtual std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const override;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
void setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix);

View File

@ -1,5 +1,6 @@
#include <Access/MemoryAccessStorage.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestoreSettings.h>
#include <base/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp>
@ -8,8 +9,8 @@
namespace DB
{
MemoryAccessStorage::MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_)
: IAccessStorage(storage_name_), changes_notifier(changes_notifier_)
MemoryAccessStorage::MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_, bool allow_backup_)
: IAccessStorage(storage_name_), changes_notifier(changes_notifier_), backup_allowed(allow_backup_)
{
}
@ -65,14 +66,20 @@ AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not
std::optional<UUID> MemoryAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
UUID id = generateRandomID();
std::lock_guard lock{mutex};
if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists))
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
return id;
return std::nullopt;
}
bool MemoryAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
}
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const String & name = new_entity->getName();
@ -264,4 +271,20 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
}
}
void MemoryAccessStorage::insertFromBackup(
const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup,
const RestoreSettings & restore_settings,
std::shared_ptr<IRestoreCoordination>)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate);
for (const auto & [id, entity] : entities_from_backup)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Access/IAccessStorage.h>
#include <base/defines.h>
#include <list>
#include <memory>
#include <mutex>
@ -17,7 +18,7 @@ class MemoryAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "memory";
explicit MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_);
explicit MemoryAccessStorage(const String & storage_name_, AccessChangesNotifier & changes_notifier_, bool allow_backup_);
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -27,6 +28,9 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
@ -35,9 +39,10 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) TSA_REQUIRES(mutex);
struct Entry
{
@ -46,8 +51,9 @@ private:
};
mutable std::mutex mutex;
std::unordered_map<UUID, Entry> entries_by_id; /// We want to search entries both by ID and by the pair of name and type.
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex); /// We want to search entries both by ID and by the pair of name and type.
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
};
}

View File

@ -3,6 +3,7 @@
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <base/range.h>
#include <base/insertAtEnd.h>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm/copy.hpp>
@ -42,14 +43,14 @@ MultipleAccessStorage::~MultipleAccessStorage()
void MultipleAccessStorage::setStorages(const std::vector<StoragePtr> & storages)
{
std::unique_lock lock{mutex};
std::lock_guard lock{mutex};
nested_storages = std::make_shared<const Storages>(storages);
ids_cache.reset();
}
void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
{
std::unique_lock lock{mutex};
std::lock_guard lock{mutex};
if (boost::range::find(*nested_storages, new_storage) != nested_storages->end())
return;
auto new_storages = std::make_shared<Storages>(*nested_storages);
@ -59,7 +60,7 @@ void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove)
{
std::unique_lock lock{mutex};
std::lock_guard lock{mutex};
auto it = boost::range::find(*nested_storages, storage_to_remove);
if (it == nested_storages->end())
return;
@ -189,10 +190,10 @@ AccessEntityPtr MultipleAccessStorage::readImpl(const UUID & id, bool throw_if_n
}
std::optional<String> MultipleAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> MultipleAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
if (auto storage = findStorage(id))
return storage->readName(id, throw_if_not_exists);
return storage->readNameWithType(id, throw_if_not_exists);
if (throw_if_not_exists)
throwNotFound(id);
@ -357,4 +358,65 @@ MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const P
return std::nullopt;
}
bool MultipleAccessStorage::isBackupAllowed() const
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (storage->isBackupAllowed())
return true;
}
return false;
}
bool MultipleAccessStorage::isRestoreAllowed() const
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (storage->isRestoreAllowed())
return true;
}
return false;
}
std::vector<std::pair<UUID, AccessEntityPtr>> MultipleAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const
{
std::vector<std::pair<UUID, AccessEntityPtr>> res;
auto storages = getStoragesInternal();
size_t count = 0;
for (const auto & storage : *storages)
{
if (storage->isBackupAllowed())
{
insertAtEnd(res, storage->readAllForBackup(type, backup_settings));
++count;
}
}
if (!count)
throwBackupNotAllowed();
return res;
}
void MultipleAccessStorage::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination)
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (storage->isRestoreAllowed())
{
storage->insertFromBackup(entities_from_backup, restore_settings, restore_coordination);
return;
}
}
throwRestoreNotAllowed();
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Access/IAccessStorage.h>
#include <base/defines.h>
#include <Common/LRUCache.h>
#include <mutex>
@ -42,11 +43,16 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override;
bool isRestoreAllowed() const override;
std::vector<std::pair<UUID, AccessEntityPtr>> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const override;
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
protected:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
@ -56,8 +62,8 @@ private:
using Storages = std::vector<StoragePtr>;
std::shared_ptr<const Storages> getStoragesInternal() const;
std::shared_ptr<const Storages> nested_storages;
mutable LRUCache<UUID, Storage> ids_cache;
std::shared_ptr<const Storages> nested_storages TSA_GUARDED_BY(mutex);
mutable LRUCache<UUID, Storage> ids_cache TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};

View File

@ -19,5 +19,14 @@ bool Quota::equal(const IAccessEntity & other) const
return (all_limits == other_quota.all_limits) && (key_type == other_quota.key_type) && (to_roles == other_quota.to_roles);
}
std::vector<UUID> Quota::findDependencies() const
{
return to_roles.findDependencies();
}
void Quota::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}
}

View File

@ -45,6 +45,10 @@ struct Quota : public IAccessEntity
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<Quota>(); }
static constexpr const auto TYPE = AccessEntityType::QUOTA;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
};
using QuotaPtr = std::shared_ptr<const Quota>;

View File

@ -2,6 +2,8 @@
#include <Access/MemoryAccessStorage.h>
#include <Access/ReplicatedAccessStorage.h>
#include <Access/AccessChangesNotifier.h>
#include <Backups/RestoreSettings.h>
#include <Backups/IRestoreCoordination.h>
#include <IO/ReadHelpers.h>
#include <boost/container/flat_set.hpp>
#include <Common/ZooKeeper/KeeperException.h>
@ -33,12 +35,14 @@ ReplicatedAccessStorage::ReplicatedAccessStorage(
const String & storage_name_,
const String & zookeeper_path_,
zkutil::GetZooKeeper get_zookeeper_,
AccessChangesNotifier & changes_notifier_)
AccessChangesNotifier & changes_notifier_,
bool allow_backup_)
: IAccessStorage(storage_name_)
, zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, watched_queue(std::make_shared<ConcurrentBoundedQueue<UUID>>(std::numeric_limits<size_t>::max()))
, changes_notifier(changes_notifier_)
, backup_allowed(allow_backup_)
{
if (zookeeper_path.empty())
throw Exception("ZooKeeper path must be non-empty", ErrorCodes::BAD_ARGUMENTS);
@ -99,6 +103,15 @@ static void retryOnZooKeeperUserError(size_t attempts, Func && function)
std::optional<UUID> ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const UUID id = generateRandomID();
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
return id;
return std::nullopt;
}
bool ReplicatedAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType());
const String & name = new_entity->getName();
LOG_DEBUG(getLogger(), "Inserting entity of type {} named {} with id {}", type_info.name, name, toString(id));
@ -108,11 +121,11 @@ std::optional<UUID> ReplicatedAccessStorage::insertImpl(const AccessEntityPtr &
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); });
if (!ok)
return std::nullopt;
return false;
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
return id;
return true;
}
@ -600,4 +613,19 @@ AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if
return entry.entity;
}
void ReplicatedAccessStorage::insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path))
return;
bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate);
for (const auto & [id, entity] : entities_from_backup)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
}
}

View File

@ -6,6 +6,7 @@
#include <mutex>
#include <unordered_map>
#include <base/defines.h>
#include <base/scope_guard.h>
#include <Common/ThreadPool.h>
@ -26,7 +27,7 @@ class ReplicatedAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "replicated";
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_);
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup);
virtual ~ReplicatedAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -36,6 +37,9 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
void insertFromBackup(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr<IRestoreCoordination> restore_coordination) override;
private:
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
@ -50,6 +54,7 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -66,10 +71,10 @@ private:
bool refresh();
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper);
void refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) TSA_REQUIRES(mutex);
void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity);
void removeEntityNoLock(const UUID & id);
void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity) TSA_REQUIRES(mutex);
void removeEntityNoLock(const UUID & id) TSA_REQUIRES(mutex);
struct Entry
{
@ -82,8 +87,9 @@ private:
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
mutable std::mutex mutex;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex);
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
};
}

View File

@ -1,4 +1,5 @@
#include <Access/Role.h>
#include <base/insertAtEnd.h>
namespace DB
@ -12,4 +13,18 @@ bool Role::equal(const IAccessEntity & other) const
return (access == other_role.access) && (granted_roles == other_role.granted_roles) && (settings == other_role.settings);
}
std::vector<UUID> Role::findDependencies() const
{
std::vector<UUID> res;
insertAtEnd(res, granted_roles.findDependencies());
insertAtEnd(res, settings.findDependencies());
return res;
}
void Role::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
granted_roles.replaceDependencies(old_to_new_ids);
settings.replaceDependencies(old_to_new_ids);
}
}

View File

@ -19,6 +19,10 @@ struct Role : public IAccessEntity
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<Role>(); }
static constexpr const auto TYPE = AccessEntityType::ROLE;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};
using RolePtr = std::shared_ptr<const Role>;

View File

@ -7,6 +7,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <boost/range/algorithm/set_algorithm.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>
#include <base/sort.h>
@ -286,4 +287,54 @@ bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs)
return (lhs.all == rhs.all) && (lhs.ids == rhs.ids) && (lhs.except_ids == rhs.except_ids);
}
std::vector<UUID> RolesOrUsersSet::findDependencies() const
{
std::vector<UUID> res;
boost::range::copy(ids, std::back_inserter(res));
boost::range::copy(except_ids, std::back_inserter(res));
return res;
}
void RolesOrUsersSet::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
std::vector<UUID> new_ids;
for (auto it = ids.begin(); it != ids.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = ids.erase(it);
}
else
{
++it;
}
}
boost::range::copy(new_ids, std::inserter(ids, ids.end()));
new_ids.clear();
for (auto it = except_ids.begin(); it != except_ids.end();)
{
auto id = *it;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
new_ids.push_back(new_id);
it = except_ids.erase(it);
}
else
{
++it;
}
}
boost::range::copy(new_ids, std::inserter(except_ids, except_ids.end()));
}
}

View File

@ -5,6 +5,7 @@
#include <boost/container/flat_set.hpp>
#include <memory>
#include <optional>
#include <unordered_map>
namespace DB
@ -62,6 +63,9 @@ struct RolesOrUsersSet
friend bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs);
friend bool operator !=(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs) { return !(lhs == rhs); }
std::vector<UUID> findDependencies() const;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids);
bool all = false;
boost::container::flat_set<UUID> ids;
boost::container::flat_set<UUID> except_ids;

View File

@ -58,4 +58,14 @@ bool RowPolicy::equal(const IAccessEntity & other) const
&& restrictive == other_policy.restrictive && (to_roles == other_policy.to_roles);
}
std::vector<UUID> RowPolicy::findDependencies() const
{
return to_roles.findDependencies();
}
void RowPolicy::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
to_roles.replaceDependencies(old_to_new_ids);
}
}

View File

@ -46,6 +46,10 @@ struct RowPolicy : public IAccessEntity
static constexpr const auto TYPE = AccessEntityType::ROW_POLICY;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; }
/// Which roles or users should use this row policy.
RolesOrUsersSet to_roles;

View File

@ -1,4 +1,5 @@
#include <Access/SettingsProfile.h>
#include <base/insertAtEnd.h>
namespace DB
@ -12,4 +13,18 @@ bool SettingsProfile::equal(const IAccessEntity & other) const
return (elements == other_profile.elements) && (to_roles == other_profile.to_roles);
}
std::vector<UUID> SettingsProfile::findDependencies() const
{
std::vector<UUID> res;
insertAtEnd(res, elements.findDependencies());
insertAtEnd(res, to_roles.findDependencies());
return res;
}
void SettingsProfile::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
elements.replaceDependencies(old_to_new_ids);
to_roles.replaceDependencies(old_to_new_ids);
}
}

View File

@ -20,6 +20,10 @@ struct SettingsProfile : public IAccessEntity
std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<SettingsProfile>(); }
static constexpr const auto TYPE = AccessEntityType::SETTINGS_PROFILE;
AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return elements.isBackupAllowed(); }
};
using SettingsProfilePtr = std::shared_ptr<const SettingsProfile>;

View File

@ -12,6 +12,13 @@
namespace DB
{
namespace
{
constexpr const char ALLOW_BACKUP_SETTING_NAME[] = "allow_backup";
}
SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast)
{
init(ast, nullptr);
@ -41,7 +48,10 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A
/// Optionally check if a setting with that name is allowed.
if (access_control)
access_control->checkSettingNameIsAllowed(setting_name);
{
if (setting_name != ALLOW_BACKUP_SETTING_NAME)
access_control->checkSettingNameIsAllowed(setting_name);
}
value = ast.value;
min_value = ast.min_value;
@ -127,6 +137,36 @@ std::shared_ptr<ASTSettingsProfileElements> SettingsProfileElements::toASTWithNa
}
std::vector<UUID> SettingsProfileElements::findDependencies() const
{
std::vector<UUID> res;
for (const auto & element : *this)
{
if (element.parent_profile)
res.push_back(*element.parent_profile);
}
return res;
}
void SettingsProfileElements::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
for (auto & element : *this)
{
if (element.parent_profile)
{
auto id = *element.parent_profile;
auto it_new_id = old_to_new_ids.find(id);
if (it_new_id != old_to_new_ids.end())
{
auto new_id = it_new_id->second;
element.parent_profile = new_id;
}
}
}
}
void SettingsProfileElements::merge(const SettingsProfileElements & other)
{
insert(end(), other.begin(), other.end());
@ -138,8 +178,11 @@ Settings SettingsProfileElements::toSettings() const
Settings res;
for (const auto & elem : *this)
{
if (!elem.setting_name.empty() && !elem.value.isNull())
res.set(elem.setting_name, elem.value);
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.value.isNull())
res.set(elem.setting_name, elem.value);
}
}
return res;
}
@ -149,8 +192,11 @@ SettingsChanges SettingsProfileElements::toSettingsChanges() const
SettingsChanges res;
for (const auto & elem : *this)
{
if (!elem.setting_name.empty() && !elem.value.isNull())
res.push_back({elem.setting_name, elem.value});
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.value.isNull())
res.push_back({elem.setting_name, elem.value});
}
}
return res;
}
@ -160,7 +206,7 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
SettingsConstraints res{access_control};
for (const auto & elem : *this)
{
if (!elem.setting_name.empty())
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.min_value.isNull())
res.setMinValue(elem.setting_name, elem.min_value);
@ -189,5 +235,14 @@ std::vector<UUID> SettingsProfileElements::toProfileIDs() const
return res;
}
bool SettingsProfileElements::isBackupAllowed() const
{
for (const auto & setting : *this)
{
if (setting.setting_name == ALLOW_BACKUP_SETTING_NAME)
return static_cast<bool>(SettingFieldBool{setting.value});
}
return true;
}
}

View File

@ -3,6 +3,7 @@
#include <Core/Field.h>
#include <Core/UUID.h>
#include <optional>
#include <unordered_map>
#include <vector>
@ -57,12 +58,17 @@ public:
std::shared_ptr<ASTSettingsProfileElements> toAST() const;
std::shared_ptr<ASTSettingsProfileElements> toASTWithNames(const AccessControl & access_control) const;
std::vector<UUID> findDependencies() const;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids);
void merge(const SettingsProfileElements & other);
Settings toSettings() const;
SettingsChanges toSettingsChanges() const;
SettingsConstraints toSettingsConstraints(const AccessControl & access_control) const;
std::vector<UUID> toProfileIDs() const;
bool isBackupAllowed() const;
};
}

View File

@ -1,5 +1,6 @@
#include <Access/User.h>
#include <Core/Protocol.h>
#include <base/insertAtEnd.h>
namespace DB
@ -31,4 +32,22 @@ void User::setName(const String & name_)
name = name_;
}
std::vector<UUID> User::findDependencies() const
{
std::vector<UUID> res;
insertAtEnd(res, default_roles.findDependencies());
insertAtEnd(res, granted_roles.findDependencies());
insertAtEnd(res, grantees.findDependencies());
insertAtEnd(res, settings.findDependencies());
return res;
}
void User::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
default_roles.replaceDependencies(old_to_new_ids);
granted_roles.replaceDependencies(old_to_new_ids);
grantees.replaceDependencies(old_to_new_ids);
settings.replaceDependencies(old_to_new_ids);
}
}

View File

@ -29,6 +29,10 @@ struct User : public IAccessEntity
static constexpr const auto TYPE = AccessEntityType::USER;
AccessEntityType getType() const override { return TYPE; }
void setName(const String & name_) override;
std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
};
using UserPtr = std::shared_ptr<const User>;

View File

@ -523,8 +523,11 @@ namespace
}
}
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_)
: IAccessStorage(storage_name_), access_control(access_control_), memory_storage(storage_name_, access_control.getChangesNotifier())
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_, bool allow_backup_)
: IAccessStorage(storage_name_)
, access_control(access_control_)
, memory_storage(storage_name_, access_control.getChangesNotifier(), false)
, backup_allowed(allow_backup_)
{
}
@ -655,9 +658,9 @@ AccessEntityPtr UsersConfigAccessStorage::readImpl(const UUID & id, bool throw_i
}
std::optional<String> UsersConfigAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const
std::optional<std::pair<String, AccessEntityType>> UsersConfigAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const
{
return memory_storage.readName(id, throw_if_not_exists);
return memory_storage.readNameWithType(id, throw_if_not_exists);
}
}

View File

@ -22,7 +22,7 @@ public:
static constexpr char STORAGE_TYPE[] = "users.xml";
UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_);
UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_, bool allow_backup_);
~UsersConfigAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -44,17 +44,20 @@ public:
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
private:
void parseFromConfig(const Poco::Util::AbstractConfiguration & config);
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<String> readNameImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
AccessControl & access_control;
MemoryAccessStorage memory_storage;
String path;
std::unique_ptr<ConfigReloader> config_reloader;
bool backup_allowed = false;
mutable std::mutex load_mutex;
};
}

View File

@ -49,7 +49,7 @@ TEST(AccessRights, Union)
"GRANT INSERT ON *.*, "
"GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, "
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, "
"TRUNCATE, OPTIMIZE, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "

View File

@ -24,7 +24,7 @@ TEST(ReplicatedAccessStorage, ShutdownWithFailedStartup)
try
{
auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk, changes_notifier);
auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk, changes_notifier, false);
}
catch (Exception & e)
{

View File

@ -2,6 +2,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/IFactoryWithAliases.h>
#include <Parsers/ASTFunction.h>
#include <functional>
@ -105,4 +106,12 @@ private:
};
struct AggregateUtils
{
static bool isAggregateFunction(const ASTFunction & node)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(node.name);
}
};
}

View File

@ -14,6 +14,7 @@ namespace DB
namespace ErrorCodes
{
extern const int UNEXPECTED_NODE_IN_ZOOKEEPER;
extern const int LOGICAL_ERROR;
}
/// zookeeper_path/file_names/file_name->checksum_and_size
@ -27,32 +28,40 @@ namespace
using FileInfo = IBackupCoordination::FileInfo;
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
String serializePartNamesAndChecksums(const std::vector<PartNameAndChecksum> & part_names_and_checksums)
struct ReplicatedPartNames
{
WriteBufferFromOwnString out;
writeBinary(part_names_and_checksums.size(), out);
for (const auto & part_name_and_checksum : part_names_and_checksums)
{
writeBinary(part_name_and_checksum.part_name, out);
writeBinary(part_name_and_checksum.checksum, out);
}
return out.str();
}
std::vector<PartNameAndChecksum> deserializePartNamesAndChecksums(const String & str)
{
ReadBufferFromString in{str};
std::vector<PartNameAndChecksum> part_names_and_checksums;
size_t num;
readBinary(num, in);
part_names_and_checksums.resize(num);
for (size_t i = 0; i != num; ++i)
String table_name_for_logs;
static String serialize(const std::vector<PartNameAndChecksum> & part_names_and_checksums_, const String & table_name_for_logs_)
{
readBinary(part_names_and_checksums[i].part_name, in);
readBinary(part_names_and_checksums[i].checksum, in);
WriteBufferFromOwnString out;
writeBinary(part_names_and_checksums_.size(), out);
for (const auto & part_name_and_checksum : part_names_and_checksums_)
{
writeBinary(part_name_and_checksum.part_name, out);
writeBinary(part_name_and_checksum.checksum, out);
}
writeBinary(table_name_for_logs_, out);
return out.str();
}
return part_names_and_checksums;
}
static ReplicatedPartNames deserialize(const String & str)
{
ReadBufferFromString in{str};
ReplicatedPartNames res;
size_t num;
readBinary(num, in);
res.part_names_and_checksums.resize(num);
for (size_t i = 0; i != num; ++i)
{
readBinary(res.part_names_and_checksums[i].part_name, in);
readBinary(res.part_names_and_checksums[i].checksum, in);
}
readBinary(res.table_name_for_logs, in);
return res;
}
};
String serializeFileInfo(const FileInfo & info)
{
@ -122,7 +131,7 @@ namespace
BackupCoordinationDistributed::BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, preparing_barrier(zookeeper_path_ + "/preparing", get_zookeeper_, "BackupCoordination", "preparing")
, stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("BackupCoordination"))
{
createRootNodes();
}
@ -134,8 +143,8 @@ void BackupCoordinationDistributed::createRootNodes()
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_parts", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_infos", "");
zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
@ -147,101 +156,89 @@ void BackupCoordinationDistributed::removeAllNodes()
zookeeper->removeRecursive(zookeeper_path);
}
void BackupCoordinationDistributed::addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path)
void BackupCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_data_path);
zookeeper->createIfNotExists(path, "");
stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout);
}
void BackupCoordinationDistributed::addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
void BackupCoordinationDistributed::syncStageError(const String & current_host, const String & error_message)
{
stage_sync.syncStageError(current_host, error_message);
}
void BackupCoordinationDistributed::addReplicatedPartNames(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_parts/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(host_id);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name.first);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name.second);
zookeeper->create(path, serializePartNamesAndChecksums(part_names_and_checksums), zkutil::CreateMode::Persistent);
}
void BackupCoordinationDistributed::finishPreparing(const String & host_id, const String & error_message)
{
preparing_barrier.finish(host_id, error_message);
}
void BackupCoordinationDistributed::waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout) const
{
preparing_barrier.waitForAllHostsToFinish(host_ids, timeout);
prepareReplicatedTablesInfo();
}
void BackupCoordinationDistributed::prepareReplicatedTablesInfo() const
{
replicated_tables.emplace();
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths";
for (const String & escaped_table_zk_path : zookeeper->getChildren(path))
{
String table_zk_path = unescapeForFileName(escaped_table_zk_path);
for (const String & escaped_data_path : zookeeper->getChildren(path + "/" + escaped_table_zk_path))
{
String data_path = unescapeForFileName(escaped_data_path);
replicated_tables->addDataPath(table_zk_path, data_path);
}
std::lock_guard lock{mutex};
if (replicated_part_names)
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()");
}
path = zookeeper_path + "/repl_tables_parts";
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
}
Strings BackupCoordinationDistributed::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
{
std::lock_guard lock{mutex};
prepareReplicatedPartNames();
return replicated_part_names->getPartNames(table_zk_path, replica_name);
}
void BackupCoordinationDistributed::addReplicatedDataPath(
const String & table_zk_path, const String & data_path)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/";
zookeeper->create(path, data_path, zkutil::CreateMode::PersistentSequential);
}
Strings BackupCoordinationDistributed::getReplicatedDataPaths(const String & table_zk_path) const
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path);
Strings children = zookeeper->getChildren(path);
Strings data_paths;
data_paths.reserve(children.size());
for (const String & child : children)
data_paths.push_back(zookeeper->get(path + "/" + child));
return data_paths;
}
void BackupCoordinationDistributed::prepareReplicatedPartNames() const
{
if (replicated_part_names)
return;
replicated_part_names.emplace();
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_part_names";
for (const String & escaped_table_zk_path : zookeeper->getChildren(path))
{
String table_zk_path = unescapeForFileName(escaped_table_zk_path);
String path2 = path + "/" + escaped_table_zk_path;
for (const String & escaped_host_id : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
{
String host_id = unescapeForFileName(escaped_host_id);
String path3 = path2 + "/" + escaped_host_id;
for (const String & escaped_database_name : zookeeper->getChildren(path3))
{
String database_name = unescapeForFileName(escaped_database_name);
String path4 = path3 + "/" + escaped_database_name;
for (const String & escaped_table_name : zookeeper->getChildren(path4))
{
String table_name = unescapeForFileName(escaped_table_name);
String path5 = path4 + "/" + escaped_table_name;
auto part_names_and_checksums = deserializePartNamesAndChecksums(zookeeper->get(path5));
replicated_tables->addPartNames(host_id, {database_name, table_name}, table_zk_path, part_names_and_checksums);
}
}
String replica_name = unescapeForFileName(escaped_replica_name);
auto part_names = ReplicatedPartNames::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
replicated_part_names->addPartNames(table_zk_path, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
}
}
replicated_tables->preparePartNamesByLocations();
}
Strings BackupCoordinationDistributed::getReplicatedTableDataPaths(const String & table_zk_path) const
{
return replicated_tables->getDataPaths(table_zk_path);
}
Strings BackupCoordinationDistributed::getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const
{
return replicated_tables->getPartNames(host_id, table_name, table_zk_path);
}
void BackupCoordinationDistributed::addFileInfo(const FileInfo & file_info, bool & is_data_file_required)
{
@ -305,12 +302,19 @@ std::vector<FileInfo> BackupCoordinationDistributed::getAllFileInfos() const
return file_infos;
}
Strings BackupCoordinationDistributed::listFiles(const String & prefix, const String & terminator) const
Strings BackupCoordinationDistributed::listFiles(const String & directory, bool recursive) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
String terminator = recursive ? "" : "/";
Strings elements;
std::unordered_set<std::string_view> unique_elements;
for (const String & escaped_name : escaped_names)
{
String name = unescapeForFileName(escaped_name);
@ -321,15 +325,35 @@ Strings BackupCoordinationDistributed::listFiles(const String & prefix, const St
if (!terminator.empty())
end_pos = name.find(terminator, start_pos);
std::string_view new_element = std::string_view{name}.substr(start_pos, end_pos - start_pos);
if (!elements.empty() && (elements.back() == new_element))
if (unique_elements.contains(new_element))
continue;
elements.push_back(String{new_element});
unique_elements.emplace(new_element);
}
::sort(elements.begin(), elements.end());
return elements;
}
bool BackupCoordinationDistributed::hasFiles(const String & directory) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
for (const String & escaped_name : escaped_names)
{
String name = unescapeForFileName(escaped_name);
if (name.starts_with(prefix))
return true;
}
return false;
}
std::optional<FileInfo> BackupCoordinationDistributed::getFileInfo(const String & file_name) const
{
auto zookeeper = get_zookeeper();

View File

@ -2,9 +2,6 @@
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationHelpers.h>
#include <Common/ZooKeeper/Common.h>
#include <map>
#include <unordered_map>
namespace DB
@ -17,24 +14,26 @@ public:
BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
~BackupCoordinationDistributed() override;
void addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path) override;
void addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
void syncStageError(const String & current_host, const String & error_message) override;
void addReplicatedPartNames(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
void finishPreparing(const String & host_id, const String & error_message) override;
void waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout) const override;
Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override;
Strings getReplicatedTableDataPaths(const String & table_zk_path) const override;
Strings getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const override;
void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override;
Strings getReplicatedDataPaths(const String & table_zk_path) const override;
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
void updateFileInfo(const FileInfo & file_info) override;
std::vector<FileInfo> getAllFileInfos() const override;
Strings listFiles(const String & prefix, const String & terminator) const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
std::optional<FileInfo> getFileInfo(const String & file_name) const override;
std::optional<FileInfo> getFileInfo(const SizeAndChecksum & size_and_checksum) const override;
std::optional<SizeAndChecksum> getFileSizeAndChecksum(const String & file_name) const override;
@ -47,12 +46,15 @@ public:
private:
void createRootNodes();
void removeAllNodes();
void prepareReplicatedTablesInfo() const;
void prepareReplicatedPartNames() const;
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
BackupCoordinationDistributedBarrier preparing_barrier;
mutable std::optional<BackupCoordinationReplicatedTablesInfo> replicated_tables;
BackupCoordinationStageSync stage_sync;
mutable std::mutex mutex;
mutable std::optional<BackupCoordinationReplicatedPartNames> replicated_part_names;
};
}

View File

@ -1,6 +1,8 @@
#include <Backups/BackupCoordinationHelpers.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Common/Exception.h>
#include <Common/escapeForFileName.h>
#include <IO/ReadHelpers.h>
#include <base/chrono_io.h>
#include <boost/range/adaptor/map.hpp>
@ -16,37 +18,26 @@ namespace ErrorCodes
}
struct BackupCoordinationReplicatedTablesInfo::HostAndTableName
namespace
{
String host_id;
DatabaseAndTableName table_name;
struct Less
struct LessReplicaName
{
bool operator()(const HostAndTableName & lhs, const HostAndTableName & rhs) const
{
return (lhs.host_id < rhs.host_id) || ((lhs.host_id == rhs.host_id) && (lhs.table_name < rhs.table_name));
}
bool operator()(const std::shared_ptr<const HostAndTableName> & lhs, const std::shared_ptr<const HostAndTableName> & rhs) const
{
return operator()(*lhs, *rhs);
}
bool operator()(const std::shared_ptr<const String> & left, const std::shared_ptr<const String> & right) { return *left < *right; }
};
};
}
class BackupCoordinationReplicatedTablesInfo::CoveredPartsFinder
class BackupCoordinationReplicatedPartNames::CoveredPartsFinder
{
public:
CoveredPartsFinder() = default;
explicit CoveredPartsFinder(const String & table_name_for_logs_) : table_name_for_logs(table_name_for_logs_) {}
void addPart(const String & new_part_name, const std::shared_ptr<const HostAndTableName> & host_and_table_name)
void addPartName(const String & new_part_name, const std::shared_ptr<const String> & replica_name)
{
addPart(MergeTreePartInfo::fromPartName(new_part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING), host_and_table_name);
addPartName(MergeTreePartInfo::fromPartName(new_part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING), replica_name);
}
void addPart(MergeTreePartInfo && new_part_info, const std::shared_ptr<const HostAndTableName> & host_and_table_name)
void addPartName(MergeTreePartInfo && new_part_info, const std::shared_ptr<const String> & replica_name)
{
auto new_min_block = new_part_info.min_block;
auto new_max_block = new_part_info.max_block;
@ -57,7 +48,7 @@ public:
if (first_it == parts.end())
{
/// All max_blocks < part_info.min_block, so we can safely add the `part_info` to the list of parts.
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), host_and_table_name});
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
return;
}
@ -68,7 +59,7 @@ public:
{
/// (prev_info.max_block < part_info.min_block) AND (part_info.max_block < current_info.min_block),
/// so we can safely add the `part_info` to the list of parts.
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), host_and_table_name});
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
return;
}
@ -92,22 +83,19 @@ public:
{
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Intersected parts detected: {} in the table {}.{}{} and {} in the table {}.{}{}. It should be investigated",
"Intersected parts detected in the table {}: {} on replica {} and {} on replica {}. It should be investigated",
table_name_for_logs,
part.info.getPartName(),
part.host_and_table_name->table_name.first,
part.host_and_table_name->table_name.second,
part.host_and_table_name->host_id.empty() ? "" : (" on the host " + part.host_and_table_name->host_id),
*part.replica_name,
new_part_info.getPartName(),
host_and_table_name->table_name.first,
host_and_table_name->table_name.second,
host_and_table_name->host_id.empty() ? "" : (" on the host " + host_and_table_name->host_id));
*replica_name);
}
++last_it;
}
/// `part_info` will replace multiple parts [first_it..last_it)
parts.erase(first_it, last_it);
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), host_and_table_name});
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
}
bool isCoveredByAnotherPart(const String & part_name) const
@ -156,185 +144,175 @@ private:
struct PartInfo
{
MergeTreePartInfo info;
std::shared_ptr<const HostAndTableName> host_and_table_name;
std::shared_ptr<const String> replica_name;
};
using Parts = std::map<Int64 /* max_block */, PartInfo>;
std::unordered_map<String, Parts> partitions;
const String table_name_for_logs;
};
void BackupCoordinationReplicatedTablesInfo::addDataPath(const String & table_zk_path, const String & table_data_path)
{
tables[table_zk_path].data_paths.push_back(table_data_path);
}
BackupCoordinationReplicatedPartNames::BackupCoordinationReplicatedPartNames() = default;
BackupCoordinationReplicatedPartNames::~BackupCoordinationReplicatedPartNames() = default;
Strings BackupCoordinationReplicatedTablesInfo::getDataPaths(const String & table_zk_path) const
{
auto it = tables.find(table_zk_path);
if (it == tables.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "getDataPaths() called for unknown table_zk_path: {}", table_zk_path);
const auto & replicated_table = it->second;
return replicated_table.data_paths;
}
void BackupCoordinationReplicatedTablesInfo::addPartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
void BackupCoordinationReplicatedPartNames::addPartNames(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{
auto & table = tables[table_zk_path];
auto & part_locations_by_names = table.part_locations_by_names;
auto host_and_table_name = std::make_shared<HostAndTableName>();
host_and_table_name->host_id = host_id;
host_and_table_name->table_name = table_name;
if (part_names_prepared)
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()");
auto & table_info = table_infos[table_zk_path];
if (!table_info.covered_parts_finder)
table_info.covered_parts_finder = std::make_unique<CoveredPartsFinder>(table_name_for_logs);
auto replica_name_ptr = std::make_shared<String>(replica_name);
for (const auto & part_name_and_checksum : part_names_and_checksums)
{
const auto & part_name = part_name_and_checksum.part_name;
const auto & checksum = part_name_and_checksum.checksum;
auto it = part_locations_by_names.find(part_name);
if (it == part_locations_by_names.end())
auto it = table_info.parts_replicas.find(part_name);
if (it == table_info.parts_replicas.end())
{
it = part_locations_by_names.emplace(part_name, PartLocations{}).first;
it = table_info.parts_replicas.emplace(part_name, PartReplicas{}).first;
it->second.checksum = checksum;
}
else
{
const auto & existing = it->second;
if (existing.checksum != checksum)
const auto & other = it->second;
if (other.checksum != checksum)
{
const auto & existing_host_and_table_name = **existing.host_and_table_names.begin();
const String & other_replica_name = **other.replica_names.begin();
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Table {}.{} has part {} which is different from the part of table {}.{}. Must be the same",
table_name.first,
table_name.second,
"Table {} on replica {} has part {} which is different from the part on replica {}. Must be the same",
table_name_for_logs,
replica_name,
part_name,
existing_host_and_table_name.table_name.first,
existing_host_and_table_name.table_name.second);
other_replica_name);
}
}
auto & host_and_table_names = it->second.host_and_table_names;
auto & replica_names = it->second.replica_names;
/// `host_and_table_names` should be ordered because we need this vector to be in the same order on every replica.
host_and_table_names.insert(
std::upper_bound(host_and_table_names.begin(), host_and_table_names.end(), host_and_table_name, HostAndTableName::Less{}),
host_and_table_name);
/// `replica_names` should be ordered because we need this vector to be in the same order on every replica.
replica_names.insert(
std::upper_bound(replica_names.begin(), replica_names.end(), replica_name_ptr, LessReplicaName{}), replica_name_ptr);
table_info.covered_parts_finder->addPartName(part_name, replica_name_ptr);
}
}
Strings BackupCoordinationReplicatedTablesInfo::getPartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const
Strings BackupCoordinationReplicatedPartNames::getPartNames(const String & table_zk_path, const String & replica_name) const
{
if (!part_names_by_locations_prepared)
throw Exception(ErrorCodes::LOGICAL_ERROR, "preparePartNamesByLocations() was not called before getPartNames()");
auto it = tables.find(table_zk_path);
if (it == tables.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "getPartNames() called for unknown table_zk_path: {}", table_zk_path);
const auto & table = it->second;
auto it2 = table.part_names_by_locations.find(host_id);
if (it2 == table.part_names_by_locations.end())
preparePartNames();
auto it = table_infos.find(table_zk_path);
if (it == table_infos.end())
return {};
const auto & part_names_by_host_id = it2->second;
auto it3 = part_names_by_host_id.find(table_name);
if (it3 == part_names_by_host_id.end())
const auto & replicas_parts = it->second.replicas_parts;
auto it2 = replicas_parts.find(replica_name);
if (it2 == replicas_parts.end())
return {};
return it3->second;
return it2->second;
}
void BackupCoordinationReplicatedTablesInfo::preparePartNamesByLocations()
void BackupCoordinationReplicatedPartNames::preparePartNames() const
{
if (part_names_by_locations_prepared)
if (part_names_prepared)
return;
part_names_by_locations_prepared = true;
size_t counter = 0;
for (auto & table : tables | boost::adaptors::map_values)
for (const auto & table_info : table_infos | boost::adaptors::map_values)
{
CoveredPartsFinder covered_parts_finder;
for (const auto & [part_name, part_locations] : table.part_locations_by_names)
covered_parts_finder.addPart(part_name, *part_locations.host_and_table_names.begin());
table.part_names_by_locations.clear();
for (const auto & [part_name, part_locations] : table.part_locations_by_names)
for (const auto & [part_name, part_replicas] : table_info.parts_replicas)
{
if (covered_parts_finder.isCoveredByAnotherPart(part_name))
if (table_info.covered_parts_finder->isCoveredByAnotherPart(part_name))
continue;
size_t chosen_index = (counter++) % part_locations.host_and_table_names.size();
const auto & chosen_host_id = part_locations.host_and_table_names[chosen_index]->host_id;
const auto & chosen_table_name = part_locations.host_and_table_names[chosen_index]->table_name;
table.part_names_by_locations[chosen_host_id][chosen_table_name].push_back(part_name);
size_t chosen_index = (counter++) % part_replicas.replica_names.size();
const auto & chosen_replica_name = *part_replicas.replica_names[chosen_index];
table_info.replicas_parts[chosen_replica_name].push_back(part_name);
}
}
part_names_prepared = true;
}
BackupCoordinationDistributedBarrier::BackupCoordinationDistributedBarrier(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, const String & logger_name_, const String & operation_name_)
/// Helps to wait until all hosts come to a specified stage.
BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(&Poco::Logger::get(logger_name_))
, operation_name(operation_name_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationDistributedBarrier::createRootNodes()
void BackupCoordinationStageSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationDistributedBarrier::finish(const String & host_id, const String & error_message)
void BackupCoordinationStageSync::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout)
{
if (error_message.empty())
LOG_TRACE(log, "Host {} has finished {}", host_id, operation_name);
else
LOG_ERROR(log, "Host {} has failed {} with message: {}", host_id, operation_name, error_message);
/// Put new stage to ZooKeeper.
auto zookeeper = get_zookeeper();
if (error_message.empty())
zookeeper->create(zookeeper_path + "/" + host_id + ":ready", "", zkutil::CreateMode::Persistent);
else
zookeeper->create(zookeeper_path + "/" + host_id + ":error", error_message, zkutil::CreateMode::Persistent);
}
zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + std::to_string(new_stage), "");
void BackupCoordinationDistributedBarrier::waitForAllHostsToFinish(const Strings & host_ids, const std::chrono::seconds timeout) const
{
auto zookeeper = get_zookeeper();
if (wait_hosts.empty() || ((wait_hosts.size() == 1) && (wait_hosts.front() == current_host)))
return;
bool all_hosts_ready = false;
String not_ready_host_id;
String error_host_id;
String error_message;
/// Wait for other hosts.
/// Returns true of everything's ready, or false if we need to wait more.
auto process_nodes = [&](const Strings & nodes)
/// Current stages of all hosts.
std::optional<String> host_with_error;
std::optional<String> error_message;
std::map<String, std::optional<int>> unready_hosts;
for (const String & host : wait_hosts)
unready_hosts.emplace(host, std::optional<int>{});
/// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`.
auto process_zk_nodes = [&](const Strings & zk_nodes)
{
std::unordered_set<std::string_view> set{nodes.begin(), nodes.end()};
for (const String & host_id : host_ids)
for (const String & zk_node : zk_nodes)
{
if (set.contains(host_id + ":error"))
if (zk_node == "error")
{
error_host_id = host_id;
error_message = zookeeper->get(zookeeper_path + "/" + host_id + ":error");
String str = zookeeper->get(zookeeper_path + "/" + zk_node);
size_t separator_pos = str.find('|');
if (separator_pos == String::npos)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected value of zk node {}: {}", zookeeper_path + "/" + zk_node, str);
host_with_error = str.substr(0, separator_pos);
error_message = str.substr(separator_pos + 1);
return;
}
if (!set.contains(host_id + ":ready"))
else if (!zk_node.starts_with("remove_watch-"))
{
LOG_TRACE(log, "Waiting for host {} {}", host_id, operation_name);
not_ready_host_id = host_id;
return;
size_t separator_pos = zk_node.find('|');
if (separator_pos == String::npos)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node);
String host = zk_node.substr(0, separator_pos);
int found_stage = parseFromString<int>(zk_node.substr(separator_pos + 1));
auto it = unready_hosts.find(host);
if (it != unready_hosts.end())
{
auto & stage = it->second;
if (!stage || (stage < found_stage))
stage = found_stage;
if (stage >= new_stage)
unready_hosts.erase(it);
}
}
}
all_hosts_ready = true;
};
/// Wait until all hosts are ready or an error happens or time is out.
std::atomic<bool> watch_set = false;
std::condition_variable watch_triggered_event;
@ -347,33 +325,25 @@ void BackupCoordinationDistributedBarrier::waitForAllHostsToFinish(const Strings
auto watch_triggered = [&] { return !watch_set; };
bool use_timeout = (timeout.count() >= 0);
std::chrono::steady_clock::duration time_left = timeout;
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
std::chrono::steady_clock::duration elapsed;
std::mutex dummy_mutex;
while (true)
while (!unready_hosts.empty() && !error_message)
{
if (use_timeout && (time_left.count() <= 0))
{
Strings children = zookeeper->getChildren(zookeeper_path);
process_nodes(children);
break;
}
watch_set = true;
Strings children = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
process_nodes(children);
if (!error_message.empty() || all_hosts_ready)
break;
Strings nodes = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
process_zk_nodes(nodes);
if (!unready_hosts.empty() && !error_message)
{
LOG_TRACE(log, "Waiting for host {}", unready_hosts.begin()->first);
std::unique_lock dummy_lock{dummy_mutex};
if (use_timeout)
{
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
if (!watch_triggered_event.wait_for(dummy_lock, time_left, watch_triggered))
elapsed = std::chrono::steady_clock::now() - start_time;
if ((elapsed > timeout) || !watch_triggered_event.wait_for(dummy_lock, timeout - elapsed, watch_triggered))
break;
time_left -= (std::chrono::steady_clock::now() - start_time);
}
else
watch_triggered_event.wait(dummy_lock, watch_triggered);
@ -385,32 +355,26 @@ void BackupCoordinationDistributedBarrier::waitForAllHostsToFinish(const Strings
/// Remove watch by triggering it.
zookeeper->create(zookeeper_path + "/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
std::unique_lock dummy_lock{dummy_mutex};
watch_triggered_event.wait_for(dummy_lock, timeout, watch_triggered);
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
if (!error_message.empty())
if (error_message)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Error occurred on host {}: {}", *host_with_error, *error_message);
if (!unready_hosts.empty())
{
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} failed {} with message: {}",
error_host_id,
operation_name,
error_message);
"Waited for host {} too long ({})",
unready_hosts.begin()->first,
to_string(elapsed));
}
}
if (all_hosts_ready)
{
LOG_TRACE(log, "All hosts have finished {}", operation_name);
return;
}
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} has failed {}: Time ({}) is out",
not_ready_host_id,
operation_name,
to_string(timeout));
void BackupCoordinationStageSync::syncStageError(const String & current_host, const String & error_message)
{
auto zookeeper = get_zookeeper();
zookeeper->createIfNotExists(zookeeper_path + "/error", current_host + "|" + error_message);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/IBackupCoordination.h>
#include <Backups/IRestoreCoordination.h>
#include <Common/ZooKeeper/Common.h>
#include <map>
#include <unordered_map>
@ -10,81 +11,67 @@ namespace DB
{
/// Helper designed to be used in an implementation of the IBackupCoordination interface in the part related to replicated tables.
class BackupCoordinationReplicatedTablesInfo
class BackupCoordinationReplicatedPartNames
{
public:
BackupCoordinationReplicatedTablesInfo() = default;
/// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getReplicatedTableDataPaths().
void addDataPath(const String & table_zk_path, const String & table_data_path);
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedTableDataPath()).
Strings getDataPaths(const String & table_zk_path) const;
BackupCoordinationReplicatedPartNames();
~BackupCoordinationReplicatedPartNames();
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
/// getReplicatedTablePartNames().
/// getPartNames().
/// Checksums are used only to control that parts under the same names on different replicas are the same.
void addPartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums);
void preparePartNamesByLocations();
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
/// This is the same list as it was added by call of the function addReplicatedTablePartNames() but without duplications and without
/// This is the same list as it was added by call of the function addPartNames() but without duplications and without
/// parts covered by another parts.
Strings getPartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const;
Strings getPartNames(const String & table_zk_path, const String & replica_name) const;
private:
class CoveredPartsFinder;
struct HostAndTableName;
void preparePartNames() const;
struct PartLocations
class CoveredPartsFinder;
struct PartReplicas
{
std::vector<std::shared_ptr<const HostAndTableName>> host_and_table_names;
std::vector<std::shared_ptr<const String>> replica_names;
UInt128 checksum;
};
struct TableInfo
{
Strings data_paths;
std::map<String /* part_name */, PartLocations> part_locations_by_names; /// Should be ordered because we need this map to be in the same order on every replica.
std::unordered_map<String /* host_id */, std::map<DatabaseAndTableName, Strings /* part_names */>> part_names_by_locations;
std::map<String /* part_name */, PartReplicas> parts_replicas; /// Should be ordered because we need this map to be in the same order on every replica.
mutable std::unordered_map<String /* replica_name> */, Strings> replicas_parts;
std::unique_ptr<CoveredPartsFinder> covered_parts_finder;
};
std::unordered_map<String /* zk_path */, TableInfo> tables;
bool part_names_by_locations_prepared = false;
std::map<String /* table_zk_path */, TableInfo> table_infos; /// Should be ordered because we need this map to be in the same order on every replica.
mutable bool part_names_prepared = false;
};
/// Helper designed to be used in the implementation of the BackupCoordinationDistributed and RestoreCoordinationDistributed classes
/// to implement synchronization when we need all hosts to finish a specific task and then continue.
class BackupCoordinationDistributedBarrier
/// Helps to wait until all hosts come to a specified stage.
class BackupCoordinationStageSync
{
public:
BackupCoordinationDistributedBarrier(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, const String & logger_name_, const String & operation_name_);
BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets that a specified host has finished the specific task, successfully or with an error.
/// In the latter case `error_message` should be set.
void finish(const String & host_id, const String & error_message = {});
/// Waits for a specified list of hosts to finish the specific task.
void waitForAllHostsToFinish(const Strings & host_ids, const std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */) const;
void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout);
void syncStageError(const String & current_host, const String & error_message);
private:
void createRootNodes();
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
const Poco::Logger * log;
String operation_name;
Poco::Logger * log;
};
}

View File

@ -10,47 +10,43 @@ namespace DB
using SizeAndChecksum = IBackupCoordination::SizeAndChecksum;
using FileInfo = IBackupCoordination::FileInfo;
BackupCoordinationLocal::BackupCoordinationLocal() : log(&Poco::Logger::get("BackupCoordination"))
{
}
BackupCoordinationLocal::BackupCoordinationLocal() = default;
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
void BackupCoordinationLocal::addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path)
void BackupCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds)
{
}
void BackupCoordinationLocal::syncStageError(const String &, const String &)
{
}
void BackupCoordinationLocal::addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{
std::lock_guard lock{mutex};
replicated_tables.addDataPath(table_zk_path, table_data_path);
replicated_part_names.addPartNames(table_zk_path, table_name_for_logs, replica_name, part_names_and_checksums);
}
void BackupCoordinationLocal::addReplicatedTablePartNames(const String & /* host_id */, const DatabaseAndTableName & table_name, const String & table_zk_path, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
{
std::lock_guard lock{mutex};
replicated_tables.addPartNames("", table_name, table_zk_path, part_names_and_checksums);
return replicated_part_names.getPartNames(table_zk_path, replica_name);
}
void BackupCoordinationLocal::finishPreparing(const String & /* host_id */, const String & error_message)
{
LOG_TRACE(log, "Finished preparing{}", (error_message.empty() ? "" : (" with error " + error_message)));
if (!error_message.empty())
return;
replicated_tables.preparePartNamesByLocations();
}
void BackupCoordinationLocal::waitForAllHostsPrepared(const Strings & /* host_ids */, std::chrono::seconds /* timeout */) const
{
}
Strings BackupCoordinationLocal::getReplicatedTableDataPaths(const String & table_zk_path) const
void BackupCoordinationLocal::addReplicatedDataPath(const String & table_zk_path, const String & data_path)
{
std::lock_guard lock{mutex};
return replicated_tables.getDataPaths(table_zk_path);
replicated_data_paths[table_zk_path].push_back(data_path);
}
Strings BackupCoordinationLocal::getReplicatedTablePartNames(const String & /* host_id */, const DatabaseAndTableName & table_name, const String & table_zk_path) const
Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_zk_path) const
{
std::lock_guard lock{mutex};
return replicated_tables.getPartNames("", table_name, table_zk_path);
auto it = replicated_data_paths.find(table_zk_path);
if (it == replicated_data_paths.end())
return {};
return it->second;
}
@ -93,9 +89,14 @@ std::vector<FileInfo> BackupCoordinationLocal::getAllFileInfos() const
return res;
}
Strings BackupCoordinationLocal::listFiles(const String & prefix, const String & terminator) const
Strings BackupCoordinationLocal::listFiles(const String & directory, bool recursive) const
{
std::lock_guard lock{mutex};
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
String terminator = recursive ? "" : "/";
Strings elements;
for (auto it = file_names.lower_bound(prefix); it != file_names.end(); ++it)
{
@ -111,9 +112,25 @@ Strings BackupCoordinationLocal::listFiles(const String & prefix, const String &
continue;
elements.push_back(String{new_element});
}
return elements;
}
bool BackupCoordinationLocal::hasFiles(const String & directory) const
{
std::lock_guard lock{mutex};
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
prefix += '/';
auto it = file_names.lower_bound(prefix);
if (it == file_names.end())
return false;
const String & name = it->first;
return name.starts_with(prefix);
}
std::optional<FileInfo> BackupCoordinationLocal::getFileInfo(const String & file_name) const
{
std::lock_guard lock{mutex};

View File

@ -2,6 +2,7 @@
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationHelpers.h>
#include <base/defines.h>
#include <map>
#include <mutex>
@ -18,24 +19,22 @@ public:
BackupCoordinationLocal();
~BackupCoordinationLocal() override;
void addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path) override;
void addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
void syncStageError(const String & current_host, const String & error_message) override;
void finishPreparing(const String & host_id, const String & error_message) override;
void waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout) const override;
void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override;
Strings getReplicatedTableDataPaths(const String & table_zk_path) const override;
Strings getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const override;
void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override;
Strings getReplicatedDataPaths(const String & table_zk_path) const override;
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
void updateFileInfo(const FileInfo & file_info) override;
std::vector<FileInfo> getAllFileInfos() const override;
Strings listFiles(const String & prefix, const String & terminator) const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
std::optional<FileInfo> getFileInfo(const String & file_name) const override;
std::optional<FileInfo> getFileInfo(const SizeAndChecksum & size_and_checksum) const override;
@ -46,13 +45,12 @@ public:
private:
mutable std::mutex mutex;
BackupCoordinationReplicatedTablesInfo replicated_tables;
std::map<String /* file_name */, SizeAndChecksum> file_names; /// Should be ordered alphabetically, see listFiles(). For empty files we assume checksum = 0.
std::map<SizeAndChecksum, FileInfo> file_infos; /// Information about files. Without empty files.
Strings archive_suffixes;
size_t current_archive_suffix = 0;
const Poco::Logger * log;
BackupCoordinationReplicatedPartNames replicated_part_names TSA_GUARDED_BY(mutex);
std::unordered_map<String, Strings> replicated_data_paths TSA_GUARDED_BY(mutex);
std::map<String /* file_name */, SizeAndChecksum> file_names TSA_GUARDED_BY(mutex); /// Should be ordered alphabetically, see listFiles(). For empty files we assume checksum = 0.
std::map<SizeAndChecksum, FileInfo> file_infos TSA_GUARDED_BY(mutex); /// Information about files. Without empty files.
Strings archive_suffixes TSA_GUARDED_BY(mutex);
size_t current_archive_suffix TSA_GUARDED_BY(mutex) = 0;
};

View File

@ -0,0 +1,550 @@
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupUtils.h>
#include <Databases/IDatabase.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Storages/IStorage.h>
#include <base/chrono_io.h>
#include <base/insertAtEnd.h>
#include <Common/escapeForFileName.h>
#include <boost/range/algorithm/copy.hpp>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COLLECT_OBJECTS_FOR_BACKUP;
extern const int CANNOT_BACKUP_TABLE;
extern const int TABLE_IS_DROPPED;
extern const int LOGICAL_ERROR;
}
bool BackupEntriesCollector::TableKey::operator ==(const TableKey & right) const
{
return (name == right.name) && (is_temporary == right.is_temporary);
}
bool BackupEntriesCollector::TableKey::operator <(const TableKey & right) const
{
return (name < right.name) || ((name == right.name) && (is_temporary < right.is_temporary));
}
std::string_view BackupEntriesCollector::toString(Stage stage)
{
switch (stage)
{
case Stage::kPreparing: return "Preparing";
case Stage::kFindingTables: return "Finding tables";
case Stage::kExtractingDataFromTables: return "Extracting data from tables";
case Stage::kRunningPostTasks: return "Running post tasks";
case Stage::kWritingBackup: return "Writing backup";
case Stage::kError: return "Error";
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup stage: {}", static_cast<int>(stage));
}
BackupEntriesCollector::BackupEntriesCollector(
const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ContextPtr & context_,
std::chrono::seconds timeout_)
: backup_query_elements(backup_query_elements_)
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
, context(context_)
, timeout(timeout_)
, log(&Poco::Logger::get("BackupEntriesCollector"))
{
}
BackupEntriesCollector::~BackupEntriesCollector() = default;
BackupEntries BackupEntriesCollector::getBackupEntries()
{
try
{
/// getBackupEntries() must not be called multiple times.
if (current_stage != Stage::kPreparing)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries");
/// Calculate the root path for collecting backup entries, it's either empty or has the format "shards/<shard_num>/replicas/<replica_num>/".
calculateRootPathInBackup();
/// Do renaming in the create queries according to the renaming config.
renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements);
/// Find databases and tables which we're going to put to the backup.
setStage(Stage::kFindingTables);
collectDatabasesAndTablesInfo();
/// Make backup entries for the definitions of the found databases.
makeBackupEntriesForDatabasesDefs();
/// Make backup entries for the definitions of the found tables.
makeBackupEntriesForTablesDefs();
/// Make backup entries for the data of the found tables.
setStage(Stage::kExtractingDataFromTables);
makeBackupEntriesForTablesData();
/// Run all the tasks added with addPostCollectingTask().
setStage(Stage::kRunningPostTasks);
runPostCollectingTasks();
/// No more backup entries or tasks are allowed after this point.
setStage(Stage::kWritingBackup);
return std::move(backup_entries);
}
catch (...)
{
try
{
setStage(Stage::kError, getCurrentExceptionMessage(false));
}
catch (...)
{
}
throw;
}
}
void BackupEntriesCollector::setStage(Stage new_stage, const String & error_message)
{
if (new_stage == Stage::kError)
LOG_ERROR(log, "{} failed with error: {}", toString(current_stage), error_message);
else
LOG_TRACE(log, "{}", toString(new_stage));
current_stage = new_stage;
if (new_stage == Stage::kError)
{
backup_coordination->syncStageError(backup_settings.host_id, error_message);
}
else
{
auto all_hosts
= BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->syncStage(backup_settings.host_id, static_cast<int>(new_stage), all_hosts, timeout);
}
}
/// Calculates the root path for collecting backup entries,
/// it's either empty or has the format "shards/<shard_num>/replicas/<replica_num>/".
void BackupEntriesCollector::calculateRootPathInBackup()
{
root_path_in_backup = "/";
if (!backup_settings.host_id.empty())
{
auto [shard_num, replica_num]
= BackupSettings::Util::findShardNumAndReplicaNum(backup_settings.cluster_host_ids, backup_settings.host_id);
root_path_in_backup = root_path_in_backup / fs::path{"shards"} / std::to_string(shard_num) / "replicas" / std::to_string(replica_num);
}
LOG_TRACE(log, "Will use path in backup: {}", doubleQuoteString(String{root_path_in_backup}));
}
/// Finds databases and tables which we will put to the backup.
void BackupEntriesCollector::collectDatabasesAndTablesInfo()
{
bool use_timeout = (timeout.count() >= 0);
auto start_time = std::chrono::steady_clock::now();
int pass = 0;
do
{
database_infos.clear();
table_infos.clear();
consistent = true;
/// Collect information about databases and tables specified in the BACKUP query.
for (const auto & element : backup_query_elements)
{
switch (element.type)
{
case ASTBackupQuery::ElementType::TABLE:
{
collectTableInfo({element.database_name, element.table_name}, false, element.partitions, true);
break;
}
case ASTBackupQuery::ElementType::TEMPORARY_TABLE:
{
collectTableInfo({"", element.table_name}, true, element.partitions, true);
break;
}
case ASTBackupQuery::ElementType::DATABASE:
{
collectDatabaseInfo(element.database_name, element.except_tables, true);
break;
}
case ASTBackupQuery::ElementType::ALL:
{
collectAllDatabasesInfo(element.except_databases, element.except_tables);
break;
}
}
}
/// We have to check consistency of collected information to protect from the case when some table or database is
/// renamed during this collecting making the collected information invalid.
checkConsistency();
/// Two passes is absolute minimum (see `previous_table_names` & `previous_database_names`).
auto elapsed = std::chrono::steady_clock::now() - start_time;
if (!consistent && (pass >= 2) && use_timeout)
{
if (elapsed > timeout)
throw Exception(
ErrorCodes::CANNOT_COLLECT_OBJECTS_FOR_BACKUP,
"Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})",
pass,
to_string(elapsed));
}
if (pass >= 2)
LOG_WARNING(log, "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed));
++pass;
} while (!consistent);
LOG_INFO(log, "Will backup {} databases and {} tables", database_infos.size(), table_infos.size());
}
void BackupEntriesCollector::collectTableInfo(
const QualifiedTableName & table_name, bool is_temporary_table, const std::optional<ASTs> & partitions, bool throw_if_not_found)
{
/// Gather information about the table.
DatabasePtr database;
StoragePtr storage;
TableLockHolder table_lock;
ASTPtr create_table_query;
TableKey table_key{table_name, is_temporary_table};
if (throw_if_not_found)
{
auto resolved_id = is_temporary_table
? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal)
: context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal);
std::tie(database, storage) = DatabaseCatalog::instance().getDatabaseAndTable(resolved_id, context);
table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
create_table_query = storage->getCreateQueryForBackup(*this);
}
else
{
auto resolved_id = is_temporary_table
? context->tryResolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal)
: context->tryResolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal);
if (!resolved_id.empty())
std::tie(database, storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable(resolved_id, context);
if (storage)
{
try
{
table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
create_table_query = storage->getCreateQueryForBackup(*this);
}
catch (Exception & e)
{
if (e.code() != ErrorCodes::TABLE_IS_DROPPED)
throw;
}
}
if (!create_table_query)
{
consistent &= !table_infos.contains(table_key);
return;
}
}
fs::path data_path_in_backup;
if (is_temporary_table)
{
auto table_name_in_backup = renaming_map.getNewTemporaryTableName(table_name.table);
data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup);
}
else
{
auto table_name_in_backup = renaming_map.getNewTableName(table_name);
data_path_in_backup
= root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table);
}
/// Check that information is consistent.
const auto & create = create_table_query->as<const ASTCreateQuery &>();
if ((create.getTable() != table_name.table) || (is_temporary_table != create.temporary) || (create.getDatabase() != table_name.database))
{
/// Table was renamed recently.
consistent = false;
return;
}
if (auto it = table_infos.find(table_key); it != table_infos.end())
{
const auto & table_info = it->second;
if ((table_info.database != database) || (table_info.storage != storage))
{
/// Table was renamed recently.
consistent = false;
return;
}
}
/// Add information to `table_infos`.
auto & res_table_info = table_infos[table_key];
res_table_info.database = database;
res_table_info.storage = storage;
res_table_info.table_lock = table_lock;
res_table_info.create_table_query = create_table_query;
res_table_info.data_path_in_backup = data_path_in_backup;
if (partitions)
{
if (!res_table_info.partitions)
res_table_info.partitions.emplace();
insertAtEnd(*res_table_info.partitions, *partitions);
}
}
void BackupEntriesCollector::collectDatabaseInfo(const String & database_name, const std::set<DatabaseAndTableName> & except_table_names, bool throw_if_not_found)
{
/// Gather information about the database.
DatabasePtr database;
ASTPtr create_database_query;
if (throw_if_not_found)
{
database = DatabaseCatalog::instance().getDatabase(database_name);
create_database_query = database->getCreateDatabaseQueryForBackup();
}
else
{
database = DatabaseCatalog::instance().tryGetDatabase(database_name);
if (!database)
{
consistent &= !database_infos.contains(database_name);
return;
}
try
{
create_database_query = database->getCreateDatabaseQueryForBackup();
}
catch (...)
{
/// The database has been dropped recently.
consistent &= !database_infos.contains(database_name);
return;
}
}
/// Check that information is consistent.
const auto & create = create_database_query->as<const ASTCreateQuery &>();
if (create.getDatabase() != database_name)
{
/// Database was renamed recently.
consistent = false;
return;
}
if (auto it = database_infos.find(database_name); it != database_infos.end())
{
const auto & database_info = it->second;
if (database_info.database != database)
{
/// Database was renamed recently.
consistent = false;
return;
}
}
/// Add information to `database_infos`.
auto & res_database_info = database_infos[database_name];
res_database_info.database = database;
res_database_info.create_database_query = create_database_query;
/// Add information about tables too.
for (auto it = database->getTablesIteratorForBackup(*this); it->isValid(); it->next())
{
if (except_table_names.contains({database_name, it->name()}))
continue;
collectTableInfo({database_name, it->name()}, /* is_temporary_table= */ false, {}, /* throw_if_not_found= */ false);
if (!consistent)
return;
}
}
void BackupEntriesCollector::collectAllDatabasesInfo(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names)
{
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
{
if (except_database_names.contains(database_name))
continue;
collectDatabaseInfo(database_name, except_table_names, false);
if (!consistent)
return;
}
}
/// Check for consistency of collected information about databases and tables.
void BackupEntriesCollector::checkConsistency()
{
if (!consistent)
return; /// Already inconsistent, no more checks necessary
/// Databases found while we were scanning tables and while we were scanning databases - must be the same.
for (const auto & [key, table_info] : table_infos)
{
auto it = database_infos.find(key.name.database);
if (it != database_infos.end())
{
const auto & database_info = it->second;
if (database_info.database != table_info.database)
{
consistent = false;
return;
}
}
}
/// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed
/// while we were scanning.
std::set<String> database_names;
std::set<TableKey> table_names;
boost::range::copy(database_infos | boost::adaptors::map_keys, std::inserter(database_names, database_names.end()));
boost::range::copy(table_infos | boost::adaptors::map_keys, std::inserter(table_names, table_names.end()));
if (!previous_database_names || !previous_table_names || (*previous_database_names != database_names)
|| (*previous_table_names != table_names))
{
previous_database_names = std::move(database_names);
previous_table_names = std::move(table_names);
consistent = false;
}
}
/// Make backup entries for all the definitions of all the databases found.
void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs()
{
for (const auto & [database_name, database_info] : database_infos)
{
LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name));
ASTPtr new_create_query = database_info.create_database_query;
renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query);
String new_database_name = renaming_map.getNewDatabaseName(database_name);
auto metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql");
backup_entries.emplace_back(metadata_path_in_backup, std::make_shared<BackupEntryFromMemory>(serializeAST(*new_create_query)));
}
}
/// Calls IDatabase::backupTable() for all the tables found to make backup entries for tables.
void BackupEntriesCollector::makeBackupEntriesForTablesDefs()
{
for (const auto & [key, table_info] : table_infos)
{
LOG_TRACE(log, "Adding definition of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName());
ASTPtr new_create_query = table_info.create_table_query;
renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query);
fs::path metadata_path_in_backup;
if (key.is_temporary)
{
auto new_name = renaming_map.getNewTemporaryTableName(key.name.table);
metadata_path_in_backup = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(new_name) + ".sql");
}
else
{
auto new_name = renaming_map.getNewTableName(key.name);
metadata_path_in_backup
= root_path_in_backup / "metadata" / escapeForFileName(new_name.database) / (escapeForFileName(new_name.table) + ".sql");
}
backup_entries.emplace_back(metadata_path_in_backup, std::make_shared<BackupEntryFromMemory>(serializeAST(*new_create_query)));
}
}
void BackupEntriesCollector::makeBackupEntriesForTablesData()
{
if (backup_settings.structure_only)
return;
for (const auto & [key, table_info] : table_infos)
{
LOG_TRACE(log, "Adding data of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName());
const auto & storage = table_info.storage;
const auto & data_path_in_backup = table_info.data_path_in_backup;
const auto & partitions = table_info.partitions;
storage->backupData(*this, data_path_in_backup, partitions);
}
}
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
backup_entries.emplace_back(file_name, backup_entry);
}
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
insertAtEnd(backup_entries, backup_entries_);
}
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
insertAtEnd(backup_entries, std::move(backup_entries_));
}
void BackupEntriesCollector::addPostCollectingTask(std::function<void()> task)
{
if (current_stage == Stage::kWritingBackup)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding post tasks is not allowed");
post_collecting_tasks.push(std::move(task));
}
/// Runs all the tasks added with addPostCollectingTask().
void BackupEntriesCollector::runPostCollectingTasks()
{
/// Post collecting tasks can add other post collecting tasks, our code is fine with that.
while (!post_collecting_tasks.empty())
{
auto task = std::move(post_collecting_tasks.front());
post_collecting_tasks.pop();
std::move(task)();
}
}
void BackupEntriesCollector::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine)
{
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Table engine {} doesn't support partitions, cannot backup table {}",
table_engine,
storage_id.getFullTableName());
}
}

View File

@ -0,0 +1,138 @@
#pragma once
#include <Backups/BackupSettings.h>
#include <Databases/DDLRenamingVisitor.h>
#include <Core/QualifiedTableName.h>
#include <Parsers/ASTBackupQuery.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/TableLockHolder.h>
#include <filesystem>
namespace DB
{
class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
class IBackupCoordination;
class IDatabase;
using DatabasePtr = std::shared_ptr<IDatabase>;
struct StorageID;
/// Collects backup entries for all databases and tables which should be put to a backup.
class BackupEntriesCollector : private boost::noncopyable
{
public:
BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ContextPtr & context_,
std::chrono::seconds timeout_ = std::chrono::seconds(-1) /* no timeout */);
~BackupEntriesCollector();
/// Collects backup entries and returns the result.
/// This function first generates a list of databases and then call IDatabase::backup() for each database from this list.
/// At this moment IDatabase::backup() calls IStorage::backup() and they both call addBackupEntry() to build a list of backup entries.
BackupEntries getBackupEntries();
const BackupSettings & getBackupSettings() const { return backup_settings; }
std::shared_ptr<IBackupCoordination> getBackupCoordination() const { return backup_coordination; }
ContextPtr getContext() const { return context; }
/// Adds a backup entry which will be later returned by getBackupEntries().
/// These function can be called by implementations of IStorage::backup() in inherited storage classes.
void addBackupEntry(const String & file_name, BackupEntryPtr backup_entry);
void addBackupEntries(const BackupEntries & backup_entries_);
void addBackupEntries(BackupEntries && backup_entries_);
/// Adds a function which must be called after all IStorage::backup() have finished their work on all hosts.
/// This function is designed to help making a consistent in some complex cases like
/// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts.
void addPostCollectingTask(std::function<void()> task);
/// Writing a backup includes a few stages:
enum class Stage
{
/// Initial stage.
kPreparing,
/// Finding all tables and databases which we're going to put to the backup.
kFindingTables,
/// Making temporary hard links and prepare backup entries.
kExtractingDataFromTables,
/// Running special tasks for replicated databases or tables which can also prepare some backup entries.
kRunningPostTasks,
/// Writing backup entries to the backup and removing temporary hard links.
kWritingBackup,
/// An error happens during any of the stages above, the backup won't be written.
kError,
};
static std::string_view toString(Stage stage);
/// Throws an exception that a specified table engine doesn't support partitions.
[[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine);
private:
void setStage(Stage new_stage, const String & error_message = {});
void calculateRootPathInBackup();
void collectDatabasesAndTablesInfo();
void collectTableInfo(const QualifiedTableName & table_name, bool is_temporary_table, const std::optional<ASTs> & partitions, bool throw_if_not_found);
void collectDatabaseInfo(const String & database_name, const std::set<DatabaseAndTableName> & except_table_names, bool throw_if_not_found);
void collectAllDatabasesInfo(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names);
void checkConsistency();
void makeBackupEntriesForDatabasesDefs();
void makeBackupEntriesForTablesDefs();
void makeBackupEntriesForTablesData();
void runPostCollectingTasks();
const ASTBackupQuery::Elements backup_query_elements;
const BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
ContextPtr context;
std::chrono::seconds timeout;
Poco::Logger * log;
Stage current_stage = Stage::kPreparing;
std::filesystem::path root_path_in_backup;
DDLRenamingMap renaming_map;
struct DatabaseInfo
{
DatabasePtr database;
ASTPtr create_database_query;
};
struct TableInfo
{
DatabasePtr database;
StoragePtr storage;
TableLockHolder table_lock;
ASTPtr create_table_query;
std::filesystem::path data_path_in_backup;
std::optional<ASTs> partitions;
};
struct TableKey
{
QualifiedTableName name;
bool is_temporary = false;
bool operator ==(const TableKey & right) const;
bool operator <(const TableKey & right) const;
};
std::unordered_map<String, DatabaseInfo> database_infos;
std::map<TableKey, TableInfo> table_infos;
std::optional<std::set<String>> previous_database_names;
std::optional<std::set<TableKey>> previous_table_names;
bool consistent = false;
BackupEntries backup_entries;
std::queue<std::function<void()>> post_collecting_tasks;
};
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/IBackupEntry.h>
#include <base/defines.h>
#include <mutex>
namespace Poco { class TemporaryFile; }
@ -41,7 +42,7 @@ public:
private:
const DiskPtr disk;
const String file_path;
mutable std::optional<UInt64> file_size;
mutable std::optional<UInt64> file_size TSA_GUARDED_BY(get_file_size_mutex);
mutable std::mutex get_file_size_mutex;
const std::optional<UInt128> checksum;
const std::shared_ptr<Poco::TemporaryFile> temporary_file;

View File

@ -36,7 +36,7 @@ namespace ErrorCodes
extern const int WRONG_BASE_BACKUP;
extern const int BACKUP_ENTRY_ALREADY_EXISTS;
extern const int BACKUP_ENTRY_NOT_FOUND;
extern const int BAD_ARGUMENTS;
extern const int BACKUP_IS_EMPTY;
extern const int LOGICAL_ERROR;
}
@ -65,6 +65,14 @@ namespace
{
return hexChecksum(size_and_checksum.second) + std::to_string(size_and_checksum.first);
}
/// We store entries' file names in the backup without leading slashes.
String removeLeadingSlash(const String & path)
{
if (path.starts_with('/'))
return path.substr(1);
return path;
}
}
@ -151,7 +159,7 @@ BackupImpl::BackupImpl(
, uuid(backup_uuid_)
, version(CURRENT_BACKUP_VERSION)
, base_backup_info(base_backup_info_)
, log(&Poco::Logger::get("Backup"))
, log(&Poco::Logger::get("BackupImpl"))
{
open(context_);
}
@ -218,13 +226,6 @@ void BackupImpl::close()
{
std::lock_guard lock{mutex};
if (!is_internal_backup && writing_finalized)
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
writeBackupMetadata();
LOG_INFO(log, "Finalized backup {}", backup_name);
}
archive_readers.clear();
for (auto & archive_writer : archive_writers)
archive_writer = {"", nullptr};
@ -249,10 +250,12 @@ void BackupImpl::writeBackupMetadata()
config->setString("timestamp", toString(LocalDateTime{timestamp}));
config->setString("uuid", toString(*uuid));
auto all_file_infos = coordination->getAllFileInfos();
if (base_backup_info)
{
bool base_backup_in_use = false;
for (const auto & info : coordination->getAllFileInfos())
for (const auto & info : all_file_infos)
{
if (info.base_size)
base_backup_in_use = true;
@ -266,13 +269,13 @@ void BackupImpl::writeBackupMetadata()
}
size_t index = 0;
for (const auto & info : coordination->getAllFileInfos())
for (const auto & info : all_file_infos)
{
String prefix = index ? "contents.file[" + std::to_string(index) + "]." : "contents.file.";
config->setString(prefix + "name", info.file_name);
config->setUInt(prefix + "size", info.size);
if (info.size)
{
config->setString(prefix + "name", info.file_name);
config->setString(prefix + "checksum", hexChecksum(info.checksum));
if (info.base_size)
{
@ -303,6 +306,7 @@ void BackupImpl::writeBackupMetadata()
else
out = writer->writeFile(".backup");
out->write(str.data(), str.size());
out->finalize();
}
void BackupImpl::readBackupMetadata()
@ -375,18 +379,25 @@ void BackupImpl::readBackupMetadata()
}
}
Strings BackupImpl::listFiles(const String & prefix, const String & terminator) const
Strings BackupImpl::listFiles(const String & directory, bool recursive) const
{
std::lock_guard lock{mutex};
if (!prefix.ends_with('/') && !prefix.empty())
throw Exception("prefix should end with '/'", ErrorCodes::BAD_ARGUMENTS);
return coordination->listFiles(prefix, terminator);
auto adjusted_dir = removeLeadingSlash(directory);
return coordination->listFiles(adjusted_dir, recursive);
}
bool BackupImpl::hasFiles(const String & directory) const
{
std::lock_guard lock{mutex};
auto adjusted_dir = removeLeadingSlash(directory);
return coordination->hasFiles(adjusted_dir);
}
bool BackupImpl::fileExists(const String & file_name) const
{
std::lock_guard lock{mutex};
return coordination->getFileInfo(file_name).has_value();
auto adjusted_path = removeLeadingSlash(file_name);
return coordination->getFileInfo(adjusted_path).has_value();
}
bool BackupImpl::fileExists(const SizeAndChecksum & size_and_checksum) const
@ -398,7 +409,8 @@ bool BackupImpl::fileExists(const SizeAndChecksum & size_and_checksum) const
UInt64 BackupImpl::getFileSize(const String & file_name) const
{
std::lock_guard lock{mutex};
auto info = coordination->getFileInfo(file_name);
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
@ -408,7 +420,8 @@ UInt64 BackupImpl::getFileSize(const String & file_name) const
UInt128 BackupImpl::getFileChecksum(const String & file_name) const
{
std::lock_guard lock{mutex};
auto info = coordination->getFileInfo(file_name);
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
@ -418,7 +431,8 @@ UInt128 BackupImpl::getFileChecksum(const String & file_name) const
SizeAndChecksum BackupImpl::getFileSizeAndChecksum(const String & file_name) const
{
std::lock_guard lock{mutex};
auto info = coordination->getFileInfo(file_name);
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
@ -436,17 +450,18 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
if (open_mode != OpenMode::READ)
throw Exception("Backup is not opened for reading", ErrorCodes::LOGICAL_ERROR);
if (!size_and_checksum.first)
{
/// Entry's data is empty.
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
}
auto info_opt = coordination->getFileInfo(size_and_checksum);
if (!info_opt)
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, formatSizeAndChecksum(size_and_checksum));
const auto & info = *info_opt;
if (!info.size)
{
/// Entry's data is empty.
return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
}
if (!info.base_size)
{
@ -494,12 +509,16 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
if (open_mode != OpenMode::WRITE)
throw Exception("Backup is not opened for writing", ErrorCodes::LOGICAL_ERROR);
if (coordination->getFileInfo(file_name))
if (writing_finalized)
throw Exception("Backup is already finalized", ErrorCodes::LOGICAL_ERROR);
auto adjusted_path = removeLeadingSlash(file_name);
if (coordination->getFileInfo(adjusted_path))
throw Exception(
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name, quoteString(file_name));
FileInfo info;
info.file_name = file_name;
info.file_name = adjusted_path;
size_t size = entry->getSize();
info.size = size;
@ -520,13 +539,13 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
}
/// Check if a entry with such name exists in the base backup.
bool base_exists = (base_backup && base_backup->fileExists(file_name));
bool base_exists = (base_backup && base_backup->fileExists(adjusted_path));
UInt64 base_size = 0;
UInt128 base_checksum{0, 0};
if (base_exists)
{
base_size = base_backup->getFileSize(file_name);
base_checksum = base_backup->getFileChecksum(file_name);
base_size = base_backup->getFileSize(adjusted_path);
base_checksum = base_backup->getFileChecksum(adjusted_path);
}
std::unique_ptr<SeekableReadBuffer> read_buffer; /// We'll set that later.
@ -647,6 +666,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
}
copyData(*read_buffer, *out);
out->finalize();
}
@ -656,6 +676,19 @@ void BackupImpl::finalizeWriting()
if (open_mode != OpenMode::WRITE)
throw Exception("Backup is not opened for writing", ErrorCodes::LOGICAL_ERROR);
if (writing_finalized)
throw Exception("Backup is already finalized", ErrorCodes::LOGICAL_ERROR);
if (!coordination->hasFiles(""))
throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY);
if (!is_internal_backup)
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
writeBackupMetadata();
LOG_TRACE(log, "Finalized backup {}", backup_name);
}
writing_finalized = true;
}

View File

@ -57,7 +57,8 @@ public:
OpenMode getOpenMode() const override { return open_mode; }
time_t getTimestamp() const override;
UUID getUUID() const override { return *uuid; }
Strings listFiles(const String & prefix, const String & terminator) const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
bool fileExists(const String & file_name) const override;
bool fileExists(const SizeAndChecksum & size_and_checksum) const override;
UInt64 getFileSize(const String & file_name) const override;

View File

@ -1,437 +1,60 @@
#include <Backups/BackupUtils.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/BackupSettings.h>
#include <Backups/DDLCompareUtils.h>
#include <Backups/DDLRenamingVisitor.h>
#include <Backups/IBackup.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/formatTableNameOrTemporaryTableName.h>
#include <Backups/replaceTableUUIDWithMacroInReplicatedTableDef.h>
#include <Common/escapeForFileName.h>
#include <Backups/RestoreSettings.h>
#include <Access/Common/AccessRightsElement.h>
#include <Databases/IDatabase.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/formatAST.h>
#include <Storages/IStorage.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Databases/DDLRenamingVisitor.h>
#include <Interpreters/DatabaseCatalog.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_BACKUP_TABLE;
extern const int CANNOT_BACKUP_DATABASE;
extern const int BACKUP_IS_EMPTY;
extern const int LOGICAL_ERROR;
}
namespace
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements)
{
/// Helper to calculate paths inside a backup.
class PathsInBackup
DDLRenamingMap map;
for (const auto & element : elements)
{
public:
/// Returns the path to metadata in backup.
static String getMetadataPath(const DatabaseAndTableName & table_name, size_t shard_index, size_t replica_index)
switch (element.type)
{
if (table_name.first.empty() || table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
return getPathForShardAndReplica(shard_index, replica_index) + String{"metadata/"} + escapeForFileName(table_name.first) + "/"
+ escapeForFileName(table_name.second) + ".sql";
}
static String getMetadataPath(const String & database_name, size_t shard_index, size_t replica_index)
{
if (database_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name must not be empty");
return getPathForShardAndReplica(shard_index, replica_index) + String{"metadata/"} + escapeForFileName(database_name) + ".sql";
}
static String getMetadataPath(const IAST & create_query, size_t shard_index, size_t replica_index)
{
const auto & create = create_query.as<const ASTCreateQuery &>();
if (!create.table)
return getMetadataPath(create.getDatabase(), shard_index, replica_index);
if (create.temporary)
return getMetadataPath({DatabaseCatalog::TEMPORARY_DATABASE, create.getTable()}, shard_index, replica_index);
return getMetadataPath({create.getDatabase(), create.getTable()}, shard_index, replica_index);
}
/// Returns the path to table's data in backup.
static String getDataPath(const DatabaseAndTableName & table_name, size_t shard_index, size_t replica_index)
{
if (table_name.first.empty() || table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
assert(!table_name.first.empty() && !table_name.second.empty());
return getPathForShardAndReplica(shard_index, replica_index) + String{"data/"} + escapeForFileName(table_name.first) + "/"
+ escapeForFileName(table_name.second) + "/";
}
static String getDataPath(const IAST & create_query, size_t shard_index, size_t replica_index)
{
const auto & create = create_query.as<const ASTCreateQuery &>();
if (!create.table)
return {};
if (create.temporary)
return getDataPath({DatabaseCatalog::TEMPORARY_DATABASE, create.getTable()}, shard_index, replica_index);
return getDataPath({create.getDatabase(), create.getTable()}, shard_index, replica_index);
}
private:
static String getPathForShardAndReplica(size_t shard_index, size_t replica_index)
{
if (shard_index || replica_index)
return fmt::format("shards/{}/replicas/{}/", shard_index, replica_index);
else
return "";
}
};
using Kind = ASTBackupQuery::Kind;
using Element = ASTBackupQuery::Element;
using Elements = ASTBackupQuery::Elements;
using ElementType = ASTBackupQuery::ElementType;
/// Makes backup entries to backup databases and tables according to the elements of ASTBackupQuery.
/// Keep this class consistent with RestoreTasksBuilder.
class BackupEntriesBuilder
{
public:
BackupEntriesBuilder(const ContextPtr & context_, const BackupSettings & backup_settings_, std::shared_ptr<IBackupCoordination> backup_coordination_)
: context(context_), backup_settings(backup_settings_), backup_coordination(backup_coordination_)
{
}
/// Prepares internal structures for making backup entries.
void prepare(const ASTBackupQuery::Elements & elements, std::chrono::seconds timeout_for_other_nodes_to_prepare)
{
try
case ASTBackupQuery::TABLE:
{
prepareImpl(elements);
}
catch (...)
{
backup_coordination->finishPreparing(backup_settings.host_id, getCurrentExceptionMessage(false));
throw;
const String & table_name = element.table_name;
const String & database_name = element.database_name;
const String & new_table_name = element.new_table_name;
const String & new_database_name = element.new_database_name;
assert(!table_name.empty());
assert(!new_table_name.empty());
assert(!database_name.empty());
assert(!new_database_name.empty());
map.setNewTableName({database_name, table_name}, {new_database_name, new_table_name});
break;
}
/// We've finished restoring metadata, now we will wait for other replicas and shards to finish too.
/// We need this waiting because we're going to call some functions which requires data collected from other nodes too,
/// see IRestoreCoordination::checkTablesNotExistedInReplicatedDBs(), IRestoreCoordination::getReplicatedTableDataPath().
backup_coordination->finishPreparing(backup_settings.host_id);
backup_coordination->waitForAllHostsPrepared(
BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num),
timeout_for_other_nodes_to_prepare);
}
/// Makes backup entries, should be called after prepare().
BackupEntries makeBackupEntries() const
{
BackupEntries res;
for (const auto & info : databases | boost::adaptors::map_values)
res.push_back(makeBackupEntryForMetadata(*info.create_query));
for (const auto & info : tables | boost::adaptors::map_values)
case ASTBackupQuery::TEMPORARY_TABLE:
{
res.push_back(makeBackupEntryForMetadata(*info.create_query));
appendBackupEntriesForData(res, info);
const String & table_name = element.table_name;
const String & new_table_name = element.new_table_name;
assert(!table_name.empty());
assert(!new_table_name.empty());
map.setNewTemporaryTableName(table_name, new_table_name);
break;
}
/// A backup cannot be empty.
if (res.empty())
throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY);
return res;
}
private:
void prepareImpl(const ASTBackupQuery::Elements & elements)
{
calculateShardNumAndReplicaNumInBackup();
renaming_settings.setFromBackupQuery(elements);
for (const auto & element : elements)
case ASTBackupQuery::DATABASE:
{
switch (element.type)
{
case ElementType::TABLE:
{
prepareToBackupTable(element.name, element.partitions);
break;
}
case ElementType::DATABASE:
{
const String & database_name = element.name.first;
prepareToBackupDatabase(database_name, element.except_list);
break;
}
case ElementType::ALL_DATABASES:
{
prepareToBackupAllDatabases(element.except_list);
break;
}
}
}
}
void calculateShardNumAndReplicaNumInBackup()
{
size_t shard_num = 0;
size_t replica_num = 0;
if (!backup_settings.host_id.empty())
{
std::tie(shard_num, replica_num)
= BackupSettings::Util::findShardNumAndReplicaNum(backup_settings.cluster_host_ids, backup_settings.host_id);
}
shard_num_in_backup = shard_num;
replica_num_in_backup = replica_num;
}
/// Prepares to backup a single table and probably its database's definition.
void prepareToBackupTable(const DatabaseAndTableName & table_name_, const ASTs & partitions_)
{
auto [database, storage] = DatabaseCatalog::instance().getDatabaseAndTable({table_name_.first, table_name_.second}, context);
prepareToBackupTable(table_name_, {database, storage}, partitions_);
}
void prepareToBackupTable(const DatabaseAndTableName & table_name_, const DatabaseAndTable & table_, const ASTs & partitions_)
{
const auto & database = table_.first;
const auto & storage = table_.second;
if (!database->hasTablesToBackup())
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Cannot backup the {} because it's contained in a hollow database (engine: {})",
formatTableNameOrTemporaryTableName(table_name_),
database->getEngineName());
/// Check that we are not trying to backup the same table again.
DatabaseAndTableName name_in_backup = renaming_settings.getNewTableName(table_name_);
if (tables.contains(name_in_backup))
throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, "Cannot backup the {} twice", formatTableNameOrTemporaryTableName(name_in_backup));
/// Make a create query for this table.
auto create_query = prepareCreateQueryForBackup(database->getCreateTableQuery(table_name_.second, context));
String data_path = PathsInBackup::getDataPath(*create_query, shard_num_in_backup, replica_num_in_backup);
String zk_path;
BackupEntries data = prepareToBackupTableData(table_name_, storage, partitions_, data_path, zk_path);
TableInfo info;
info.table_name = table_name_;
info.create_query = create_query;
info.storage = storage;
info.data = std::move(data);
info.data_path = std::move(data_path);
info.zk_path = std::move(zk_path);
tables[name_in_backup] = std::move(info);
}
BackupEntries prepareToBackupTableData(const DatabaseAndTableName & table_name_, const StoragePtr & storage_, const ASTs & partitions_, const String & data_path, String & zk_path)
{
zk_path.clear();
const StorageReplicatedMergeTree * replicated_table = typeid_cast<const StorageReplicatedMergeTree *>(storage_.get());
bool has_data = (storage_->hasDataToBackup() || replicated_table) && !backup_settings.structure_only;
if (!has_data)
return {};
BackupEntries data = storage_->backupData(context, partitions_);
if (!replicated_table)
return data;
zk_path = replicated_table->getZooKeeperName() + replicated_table->getZooKeeperPath();
backup_coordination->addReplicatedTableDataPath(zk_path, data_path);
std::unordered_map<String, SipHash> parts;
for (const auto & [relative_path, backup_entry] : data)
{
size_t slash_pos = relative_path.find('/');
if (slash_pos != String::npos)
{
String part_name = relative_path.substr(0, slash_pos);
if (MergeTreePartInfo::tryParsePartName(part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING))
{
auto & hash = parts[part_name];
if (relative_path.ends_with(".bin"))
{
auto checksum = backup_entry->getChecksum();
hash.update(relative_path);
hash.update(backup_entry->getSize());
hash.update(*checksum);
}
}
}
const String & database_name = element.database_name;
const String & new_database_name = element.new_database_name;
assert(!database_name.empty());
assert(!new_database_name.empty());
map.setNewDatabaseName(database_name, new_database_name);
break;
}
std::vector<IBackupCoordination::PartNameAndChecksum> part_names_and_checksums;
part_names_and_checksums.reserve(parts.size());
for (auto & [part_name, hash] : parts)
{
UInt128 checksum;
hash.get128(checksum);
auto & part_name_and_checksum = part_names_and_checksums.emplace_back();
part_name_and_checksum.part_name = part_name;
part_name_and_checksum.checksum = checksum;
}
backup_coordination->addReplicatedTablePartNames(backup_settings.host_id, table_name_, zk_path, part_names_and_checksums);
return data;
case ASTBackupQuery::ALL: break;
}
/// Prepares to restore a database and all tables in it.
void prepareToBackupDatabase(const String & database_name_, const std::set<String> & except_list_)
{
auto database = DatabaseCatalog::instance().getDatabase(database_name_, context);
prepareToBackupDatabase(database_name_, database, except_list_);
}
void prepareToBackupDatabase(const String & database_name_, const DatabasePtr & database_, const std::set<String> & except_list_)
{
/// Check that we are not trying to restore the same database again.
String name_in_backup = renaming_settings.getNewDatabaseName(database_name_);
if (databases.contains(name_in_backup))
throw Exception(ErrorCodes::CANNOT_BACKUP_DATABASE, "Cannot backup the database {} twice", backQuoteIfNeed(name_in_backup));
/// Of course we're not going to backup the definition of the system or the temporary database.
if (!isSystemOrTemporaryDatabase(database_name_))
{
/// Make a create query for this database.
auto create_query = prepareCreateQueryForBackup(database_->getCreateDatabaseQuery());
DatabaseInfo info;
info.create_query = create_query;
databases[name_in_backup] = std::move(info);
}
/// Backup tables in this database.
if (database_->hasTablesToBackup())
{
for (auto it = database_->getTablesIterator(context); it->isValid(); it->next())
{
if (except_list_.contains(it->name()))
continue;
prepareToBackupTable({database_name_, it->name()}, {database_, it->table()}, {});
}
}
}
/// Prepares to backup all the databases contained in the backup.
void prepareToBackupAllDatabases(const std::set<String> & except_list_)
{
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
{
if (except_list_.contains(database_name))
continue;
if (isSystemOrTemporaryDatabase(database_name))
continue;
prepareToBackupDatabase(database_name, database, {});
}
}
/// Do renaming in the create query according to the renaming config.
std::shared_ptr<ASTCreateQuery> prepareCreateQueryForBackup(const ASTPtr & ast) const
{
ASTPtr query = ast;
::DB::renameInCreateQuery(query, context, renaming_settings);
auto create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(query);
replaceTableUUIDWithMacroInReplicatedTableDef(*create_query, create_query->uuid);
create_query->uuid = UUIDHelpers::Nil;
create_query->to_inner_uuid = UUIDHelpers::Nil;
return create_query;
}
static bool isSystemOrTemporaryDatabase(const String & database_name)
{
return (database_name == DatabaseCatalog::SYSTEM_DATABASE) || (database_name == DatabaseCatalog::TEMPORARY_DATABASE);
}
std::pair<String, BackupEntryPtr> makeBackupEntryForMetadata(const IAST & create_query) const
{
auto metadata_entry = std::make_unique<BackupEntryFromMemory>(serializeAST(create_query));
String metadata_path = PathsInBackup::getMetadataPath(create_query, shard_num_in_backup, replica_num_in_backup);
return {metadata_path, std::move(metadata_entry)};
}
struct TableInfo;
void appendBackupEntriesForData(BackupEntries & res, const TableInfo & info) const
{
if (info.zk_path.empty())
{
for (const auto & [relative_path, backup_entry] : info.data)
res.emplace_back(info.data_path + relative_path, backup_entry);
return;
}
Strings data_paths = backup_coordination->getReplicatedTableDataPaths(info.zk_path);
Strings part_names = backup_coordination->getReplicatedTablePartNames(backup_settings.host_id, info.table_name, info.zk_path);
std::unordered_set<std::string_view> part_names_set{part_names.begin(), part_names.end()};
for (const auto & [relative_path, backup_entry] : info.data)
{
size_t slash_pos = relative_path.find('/');
if (slash_pos != String::npos)
{
String part_name = relative_path.substr(0, slash_pos);
if (MergeTreePartInfo::tryParsePartName(part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING))
{
if (!part_names_set.contains(part_name))
continue;
for (const auto & data_path : data_paths)
res.emplace_back(data_path + relative_path, backup_entry);
continue;
}
}
res.emplace_back(info.data_path + relative_path, backup_entry);
}
}
/// Information which is used to make an instance of RestoreTableFromBackupTask.
struct TableInfo
{
DatabaseAndTableName table_name;
ASTPtr create_query;
StoragePtr storage;
BackupEntries data;
String data_path;
String zk_path;
};
/// Information which is used to make an instance of RestoreDatabaseFromBackupTask.
struct DatabaseInfo
{
ASTPtr create_query;
};
ContextPtr context;
BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
size_t shard_num_in_backup = 0;
size_t replica_num_in_backup = 0;
DDLRenamingSettings renaming_settings;
std::unordered_map<String /* db_name_in_backup */, DatabaseInfo> databases;
std::map<DatabaseAndTableName /* table_name_in_backup */, TableInfo> tables;
};
}
BackupEntries makeBackupEntries(
const ContextPtr & context,
const Elements & elements,
const BackupSettings & backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
std::chrono::seconds timeout_for_other_nodes_to_prepare)
{
BackupEntriesBuilder builder{context, backup_settings, backup_coordination};
builder.prepare(elements, timeout_for_other_nodes_to_prepare);
return builder.makeBackupEntries();
}
return map;
}
@ -499,13 +122,73 @@ void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries
/// And IBackup's implementation should remove the backup in its destructor if finalizeWriting() hasn't called before.
std::rethrow_exception(exception);
}
}
backup->finalizeWriting();
void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool)
{
size_t num_active_jobs = 0;
std::mutex mutex;
std::condition_variable event;
std::exception_ptr exception;
for (auto & task : tasks)
{
{
std::unique_lock lock{mutex};
if (exception)
break;
++num_active_jobs;
}
auto job = [&]()
{
SCOPE_EXIT({
std::lock_guard lock{mutex};
if (!--num_active_jobs)
event.notify_all();
});
{
std::lock_guard lock{mutex};
if (exception)
return;
}
try
{
std::move(task)();
}
catch (...)
{
std::lock_guard lock{mutex};
if (!exception)
exception = std::current_exception();
}
};
if (!thread_pool.trySchedule(job))
job();
}
{
std::unique_lock lock{mutex};
event.wait(lock, [&] { return !num_active_jobs; });
}
tasks.clear();
if (exception)
{
/// We don't call finalizeWriting() if an error occurs.
/// And IBackup's implementation should remove the backup in its destructor if finalizeWriting() hasn't called before.
std::rethrow_exception(exception);
}
}
/// Returns access required to execute BACKUP query.
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements, const BackupSettings & backup_settings)
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements)
{
AccessRightsElements required_access;
for (const auto & element : elements)
@ -514,32 +197,27 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements &
{
case ASTBackupQuery::TABLE:
{
if (element.is_temp_db)
break;
AccessFlags flags = AccessType::SHOW_TABLES;
if (!backup_settings.structure_only)
flags |= AccessType::SELECT;
required_access.emplace_back(flags, element.name.first, element.name.second);
required_access.emplace_back(AccessType::BACKUP, element.database_name, element.table_name);
break;
}
case ASTBackupQuery::TEMPORARY_TABLE:
{
/// It's always allowed to backup temporary tables.
break;
}
case ASTBackupQuery::DATABASE:
{
if (element.is_temp_db)
break;
AccessFlags flags = AccessType::SHOW_TABLES | AccessType::SHOW_DATABASES;
if (!backup_settings.structure_only)
flags |= AccessType::SELECT;
required_access.emplace_back(flags, element.name.first);
/// TODO: It's better to process `element.except_list` somehow.
/// TODO: It's better to process `element.except_tables` somehow.
required_access.emplace_back(AccessType::BACKUP, element.database_name);
break;
}
case ASTBackupQuery::ALL_DATABASES:
case ASTBackupQuery::ALL:
{
AccessFlags flags = AccessType::SHOW_TABLES | AccessType::SHOW_DATABASES;
if (!backup_settings.structure_only)
flags |= AccessType::SELECT;
required_access.emplace_back(flags);
/// TODO: It's better to process `element.except_list` somehow.
/// TODO: It's better to process `element.except_databases` & `element.except_tables` somehow.
required_access.emplace_back(AccessType::BACKUP);
break;
}
}

View File

@ -7,29 +7,23 @@
namespace DB
{
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupMutablePtr = std::shared_ptr<IBackup>;
class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
struct BackupSettings;
class IBackupCoordination;
using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
using DataRestoreTasks = std::vector<std::function<void()>>;
class AccessRightsElements;
class Context;
using ContextPtr = std::shared_ptr<const Context>;
class DDLRenamingMap;
/// Prepares backup entries.
BackupEntries makeBackupEntries(
const ContextPtr & context,
const ASTBackupQuery::Elements & elements,
const BackupSettings & backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
std::chrono::seconds timeout_for_other_nodes_to_prepare = std::chrono::seconds::zero());
/// Initializes a DDLRenamingMap from a BACKUP or RESTORE query.
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements);
/// Write backup entries to an opened backup.
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool);
/// Run data restoring tasks which insert data to tables.
void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool);
/// Returns access required to execute BACKUP query.
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements, const BackupSettings & backup_settings);
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements);
}

View File

@ -4,13 +4,13 @@
#include <Backups/BackupSettings.h>
#include <Backups/BackupUtils.h>
#include <Backups/IBackupEntry.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupCoordinationDistributed.h>
#include <Backups/BackupCoordinationLocal.h>
#include <Backups/IRestoreTask.h>
#include <Backups/RestoreCoordinationDistributed.h>
#include <Backups/RestoreCoordinationLocal.h>
#include <Backups/RestoreSettings.h>
#include <Backups/RestoreUtils.h>
#include <Backups/RestorerFromBackup.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
@ -51,119 +51,121 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c
{
UUID backup_uuid = UUIDHelpers::generateV4();
auto backup_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
auto backup_settings = BackupSettings::fromBackupQuery(*backup_query);
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
bool on_cluster = !backup_query->cluster.empty();
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
if (on_cluster || backup_settings.async)
context_in_use = mutable_context = Context::createCopy(context);
addInfo(backup_uuid, backup_info.toString(), BackupStatus::MAKING_BACKUP, backup_settings.internal);
std::shared_ptr<IBackupCoordination> backup_coordination;
SCOPE_EXIT({
if (backup_coordination && !backup_settings.internal)
backup_coordination->drop();
});
BackupMutablePtr backup;
ContextPtr cloned_context;
bool on_cluster = !backup_query->cluster.empty();
std::shared_ptr<BlockIO> on_cluster_io;
try
{
auto access_to_check = getRequiredAccessToBackup(backup_query->elements, backup_settings);
if (!on_cluster)
context->checkAccess(access_to_check);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
cluster = context->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
backup_settings.copySettingsToQuery(*backup_query);
}
if (!backup_settings.coordination_zk_path.empty())
backup_coordination = std::make_shared<BackupCoordinationDistributed>(
backup_settings.coordination_zk_path,
[global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); });
else
backup_coordination = std::make_shared<BackupCoordinationLocal>();
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.backup_uuid = backup_uuid;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup = BackupFactory::instance().createBackup(backup_create_params);
ContextMutablePtr mutable_context;
if (on_cluster || backup_settings.async)
cloned_context = mutable_context = Context::createCopy(context);
else
cloned_context = context; /// No need to clone context
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = access_to_check;
mutable_context->setSetting("distributed_ddl_task_timeout", -1); // No timeout
mutable_context->setSetting("distributed_ddl_output_mode", Field{"throw"});
auto res = executeDDLQueryOnCluster(backup_query, mutable_context, params);
on_cluster_io = std::make_shared<BlockIO>(std::move(res));
}
}
catch (...)
{
setStatus(backup_uuid, BackupStatus::FAILED_TO_BACKUP);
throw;
}
auto job = [this,
backup,
backup_uuid,
backup_query,
backup_settings,
backup_coordination,
on_cluster_io,
cloned_context](bool in_separate_thread)
backup_info,
on_cluster,
context_in_use,
mutable_context](bool in_separate_thread) mutable
{
try
{
if (on_cluster_io)
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context_in_use->checkAccess(required_access);
/// Make a backup coordination.
std::shared_ptr<IBackupCoordination> backup_coordination;
SCOPE_EXIT({
if (backup_coordination && !backup_settings.internal)
backup_coordination->drop();
});
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context_in_use->getMacros()->expand(backup_query->cluster);
cluster = context_in_use->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
}
if (!backup_settings.coordination_zk_path.empty())
{
backup_coordination = std::make_shared<BackupCoordinationDistributed>(
backup_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
backup_coordination = std::make_shared<BackupCoordinationLocal>();
}
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context_in_use;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.backup_uuid = backup_uuid;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
mutable_context->setSetting("distributed_ddl_task_timeout", -1); // No timeout
mutable_context->setSetting("distributed_ddl_output_mode", Field{"throw"});
backup_settings.copySettingsToQuery(*backup_query);
auto res = executeDDLQueryOnCluster(backup_query, mutable_context, params);
auto on_cluster_io = std::make_shared<BlockIO>(std::move(res));
PullingPipelineExecutor executor(on_cluster_io->pipeline);
Block block;
while (executor.pull(block))
;
backup->finalizeWriting();
while (executor.pull(block));
}
else
{
std::optional<CurrentThread::QueryScope> query_scope;
if (in_separate_thread)
query_scope.emplace(cloned_context);
query_scope.emplace(context_in_use);
backup_query->setDatabase(cloned_context->getCurrentDatabase());
backup_query->setCurrentDatabase(context_in_use->getCurrentDatabase());
BackupEntries backup_entries;
{
auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.backup_prepare_timeout", -1)};
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use, timeout};
backup_entries = backup_entries_collector.getBackupEntries();
}
auto timeout_for_preparing = std::chrono::seconds{cloned_context->getConfigRef().getInt("backups.backup_prepare_timeout", -1)};
auto backup_entries
= makeBackupEntries(cloned_context, backup_query->elements, backup_settings, backup_coordination, timeout_for_preparing);
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
}
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
backup->finalizeWriting();
/// Close the backup.
backup.reset();
setStatus(backup_uuid, BackupStatus::BACKUP_COMPLETE);
}
catch (...)
@ -175,7 +177,7 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c
};
if (backup_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job] { job(true); });
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
@ -187,85 +189,99 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
{
UUID restore_uuid = UUIDHelpers::generateV4();
auto restore_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
auto restore_settings = RestoreSettings::fromRestoreQuery(*restore_query);
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
bool on_cluster = !restore_query->cluster.empty();
ContextMutablePtr context_in_use = context;
if (restore_settings.async || on_cluster)
context_in_use = Context::createCopy(context);
addInfo(restore_uuid, backup_info.toString(), BackupStatus::RESTORING, restore_settings.internal);
std::shared_ptr<IRestoreCoordination> restore_coordination;
SCOPE_EXIT({
if (restore_coordination && !restore_settings.internal)
restore_coordination->drop();
});
ContextMutablePtr cloned_context;
std::shared_ptr<BlockIO> on_cluster_io;
bool on_cluster = !restore_query->cluster.empty();
try
{
auto access_to_check = getRequiredAccessToRestore(restore_query->elements, restore_settings);
if (!on_cluster)
context->checkAccess(access_to_check);
ClusterPtr cluster;
if (on_cluster)
{
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
cluster = context->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
if (restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
restore_settings.copySettingsToQuery(*restore_query);
}
if (!restore_settings.coordination_zk_path.empty())
restore_coordination = std::make_shared<RestoreCoordinationDistributed>(
restore_settings.coordination_zk_path,
[global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); });
else
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
if (on_cluster || restore_settings.async)
cloned_context = Context::createCopy(context);
else
cloned_context = context; /// No need to clone context
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
params.access_to_check = access_to_check;
cloned_context->setSetting("distributed_ddl_task_timeout", -1); // No timeout
cloned_context->setSetting("distributed_ddl_output_mode", Field{"throw"});
auto res = executeDDLQueryOnCluster(restore_query, cloned_context, params);
on_cluster_io = std::make_shared<BlockIO>(std::move(res));
}
}
catch (...)
{
setStatus(restore_uuid, BackupStatus::FAILED_TO_RESTORE);
throw;
}
auto job = [this,
backup_info,
restore_uuid,
restore_query,
restore_settings,
restore_coordination,
on_cluster_io,
cloned_context](bool in_separate_thread)
backup_info,
on_cluster,
context_in_use](bool in_separate_thread) mutable
{
try
{
if (on_cluster_io)
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context_in_use;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context_in_use->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
if (on_cluster)
{
restore_query->cluster = context_in_use->getMacros()->expand(restore_query->cluster);
cluster = context_in_use->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use, {}};
dummy_restorer.checkAccessOnly();
}
}
/// Make a restore coordination.
std::shared_ptr<IRestoreCoordination> restore_coordination;
SCOPE_EXIT({
if (restore_coordination && !restore_settings.internal)
restore_coordination->drop();
});
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
if (!restore_settings.coordination_zk_path.empty())
{
restore_coordination = std::make_shared<RestoreCoordinationDistributed>(
restore_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
}
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
context_in_use->setSetting("distributed_ddl_task_timeout", -1); // No timeout
context_in_use->setSetting("distributed_ddl_output_mode", Field{"throw"});
restore_settings.copySettingsToQuery(*restore_query);
auto res = executeDDLQueryOnCluster(restore_query, context_in_use, params);
auto on_cluster_io = std::make_shared<BlockIO>(std::move(res));
PullingPipelineExecutor executor(on_cluster_io->pipeline);
Block block;
while (executor.pull(block))
@ -275,24 +291,20 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
{
std::optional<CurrentThread::QueryScope> query_scope;
if (in_separate_thread)
query_scope.emplace(cloned_context);
query_scope.emplace(context_in_use);
restore_query->setDatabase(cloned_context->getCurrentDatabase());
restore_query->setCurrentDatabase(current_database);
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = cloned_context;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
DataRestoreTasks data_restore_tasks;
{
auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.restore_metadata_timeout", -1)};
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context_in_use, timeout};
restorer.restoreMetadata();
data_restore_tasks = restorer.getDataRestoreTasks();
}
auto timeout_for_restoring_metadata
= std::chrono::seconds{cloned_context->getConfigRef().getInt("backups.restore_metadata_timeout", -1)};
auto restore_tasks = makeRestoreTasks(
cloned_context, backup, restore_query->elements, restore_settings, restore_coordination, timeout_for_restoring_metadata);
restoreMetadata(restore_tasks, restore_settings, restore_coordination, timeout_for_restoring_metadata);
restoreData(restore_tasks, restores_thread_pool);
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
}
setStatus(restore_uuid, BackupStatus::RESTORED);
@ -306,7 +318,7 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
};
if (restore_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job] { job(true); });
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);

View File

@ -1,87 +0,0 @@
#include <Backups/DDLCompareUtils.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
namespace DB
{
namespace
{
std::shared_ptr<const ASTCreateQuery> prepareDDLToCompare(const ASTCreateQuery & ast)
{
auto res = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(ast.shared_from_this());
std::shared_ptr<ASTCreateQuery> clone;
auto get_clone = [&]
{
if (!clone)
{
clone = typeid_cast<std::shared_ptr<ASTCreateQuery>>(res->clone());
res = clone;
}
return clone;
};
/// Remove UUIDs.
if (res->uuid != UUIDHelpers::Nil)
get_clone()->uuid = UUIDHelpers::Nil;
if (res->to_inner_uuid != UUIDHelpers::Nil)
get_clone()->to_inner_uuid = UUIDHelpers::Nil;
/// Clear IF NOT EXISTS flag.
if (res->if_not_exists)
get_clone()->if_not_exists = false;
return res;
}
}
bool areTableDefinitionsSame(const IAST & table1, const IAST & table2)
{
auto ast1 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(table1.shared_from_this());
if (!ast1 || !ast1->table)
return false;
auto ast2 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(table2.shared_from_this());
if (!ast2 || !ast2->table)
return false;
if ((ast1->uuid != ast2->uuid) || (ast1->to_inner_uuid != ast2->to_inner_uuid) ||
(ast1->if_not_exists != ast2->if_not_exists))
{
ast1 = prepareDDLToCompare(*ast1);
ast2 = prepareDDLToCompare(*ast2);
}
return serializeAST(*ast1) == serializeAST(*ast1);
}
bool areDatabaseDefinitionsSame(const IAST & database1, const IAST & database2)
{
auto ast1 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(database1.shared_from_this());
if (!ast1 || ast1->table || !ast1->database)
return false;
auto ast2 = typeid_cast<std::shared_ptr<const ASTCreateQuery>>(database2.shared_from_this());
if (!ast2 || ast2->table || !ast2->database)
return false;
if ((ast1->uuid != ast2->uuid) || (ast1->if_not_exists != ast2->if_not_exists))
{
ast1 = prepareDDLToCompare(*ast1);
ast2 = prepareDDLToCompare(*ast2);
}
return serializeAST(*ast1) == serializeAST(*ast1);
}
bool areTableDataCompatible(const IAST & src_table, const IAST & dest_table)
{
return areTableDefinitionsSame(src_table, dest_table);
}
}

View File

@ -1,17 +0,0 @@
#pragma once
namespace DB
{
class IAST;
/// Checks that two table definitions are actually the same.
bool areTableDefinitionsSame(const IAST & table1, const IAST & table2);
/// Checks that two database definitions are actually the same.
bool areDatabaseDefinitionsSame(const IAST & database1, const IAST & database2);
/// Whether the data from the first table can be attached to the second table.
bool areTableDataCompatible(const IAST & src_table, const IAST & dest_table);
}

View File

@ -1,387 +0,0 @@
#include <Backups/DDLRenamingVisitor.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTBackupQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <TableFunctions/TableFunctionFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int WRONG_DDL_RENAMING_SETTINGS;
extern const int LOGICAL_ERROR;
}
namespace
{
/// Replaces names of tables and databases used in a CREATE query, which can be either CREATE TABLE or
/// CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
void visitCreateQuery(ASTCreateQuery & create, const DDLRenamingVisitor::Data & data)
{
if (create.table)
{
DatabaseAndTableName table_name;
table_name.second = create.getTable();
if (create.temporary)
table_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
else if (create.database)
table_name.first = create.getDatabase();
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE TABLE query must not be empty");
table_name = data.renaming_settings.getNewTableName(table_name);
if (table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
{
create.temporary = true;
create.setDatabase("");
}
else
{
create.temporary = false;
create.setDatabase(table_name.first);
}
create.setTable(table_name.second);
}
else if (create.database)
{
String database_name = create.getDatabase();
database_name = data.renaming_settings.getNewDatabaseName(database_name);
create.setDatabase(database_name);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE DATABASE query must not be empty");
if (!create.as_table.empty() && !create.as_database.empty())
std::tie(create.as_database, create.as_table) = data.renaming_settings.getNewTableName({create.as_database, create.as_table});
if (!create.to_table_id.table_name.empty() && !create.to_table_id.database_name.empty())
{
auto to_table = data.renaming_settings.getNewTableName({create.to_table_id.database_name, create.to_table_id.table_name});
create.to_table_id = StorageID{to_table.first, to_table.second};
}
}
/// Replaces names of a database and a table in a expression like `db`.`table`
void visitTableExpression(ASTTableExpression & expr, const DDLRenamingVisitor::Data & data)
{
if (!expr.database_and_table_name)
return;
ASTIdentifier * id = expr.database_and_table_name->as<ASTIdentifier>();
if (!id)
return;
auto table_id = id->createTable();
if (!table_id)
return;
const String & db_name = table_id->getDatabaseName();
const String & table_name = table_id->shortName();
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
expr.database_and_table_name = std::make_shared<ASTIdentifier>(Strings{new_db_name, new_table_name});
expr.children.push_back(expr.database_and_table_name);
}
/// Replaces a database's name passed via an argument of the function merge() or the table engine Merge.
void visitFunctionMerge(ASTFunction & function, const DDLRenamingVisitor::Data & data)
{
if (!function.arguments)
return;
/// The first argument is a database's name and we can rename it.
/// The second argument is a regular expression and we can do nothing about it.
auto & args = function.arguments->as<ASTExpressionList &>().children;
size_t db_name_arg_index = 0;
if (args.size() <= db_name_arg_index)
return;
String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_arg_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
if (db_name.empty())
return;
String new_db_name = data.renaming_settings.getNewDatabaseName(db_name);
if (new_db_name == db_name)
return;
args[db_name_arg_index] = std::make_shared<ASTLiteral>(new_db_name);
}
/// Replaces names of a table and a database passed via arguments of the function remote() or cluster() or the table engine Distributed.
void visitFunctionRemote(ASTFunction & function, const DDLRenamingVisitor::Data & data)
{
if (!function.arguments)
return;
/// The first argument is an address or cluster's name, so we skip it.
/// The second argument can be either 'db.name' or just 'db' followed by the third argument 'table'.
auto & args = function.arguments->as<ASTExpressionList &>().children;
const auto * second_arg_as_function = args[1]->as<ASTFunction>();
if (second_arg_as_function && TableFunctionFactory::instance().isTableFunctionName(second_arg_as_function->name))
return;
size_t db_name_index = 1;
if (args.size() <= db_name_index)
return;
String name = evaluateConstantExpressionForDatabaseName(args[db_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
size_t table_name_index = static_cast<size_t>(-1);
QualifiedTableName qualified_name;
if (function.name == "Distributed")
qualified_name.table = name;
else
qualified_name = QualifiedTableName::parseFromString(name);
if (qualified_name.database.empty())
{
std::swap(qualified_name.database, qualified_name.table);
table_name_index = 2;
if (args.size() <= table_name_index)
return;
qualified_name.table = evaluateConstantExpressionForDatabaseName(args[table_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
}
const String & db_name = qualified_name.database;
const String & table_name = qualified_name.table;
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
if (table_name_index != static_cast<size_t>(-1))
{
if (new_db_name != db_name)
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
if (new_table_name != table_name)
args[table_name_index] = std::make_shared<ASTLiteral>(new_table_name);
}
else
{
args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
args.insert(args.begin() + db_name_index + 1, std::make_shared<ASTLiteral>(new_table_name));
}
}
/// Replaces names of tables and databases used in arguments of a table function or a table engine.
void visitFunction(ASTFunction & function, const DDLRenamingVisitor::Data & data)
{
if ((function.name == "merge") || (function.name == "Merge"))
{
visitFunctionMerge(function, data);
}
else if ((function.name == "remote") || (function.name == "remoteSecure") || (function.name == "cluster") ||
(function.name == "clusterAllReplicas") || (function.name == "Distributed"))
{
visitFunctionRemote(function, data);
}
}
/// Replaces names of a table and a database used in source parameters of a dictionary.
void visitDictionary(ASTDictionary & dictionary, const DDLRenamingVisitor::Data & data)
{
if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements)
return;
auto & elements = dictionary.source->elements->as<ASTExpressionList &>().children;
String db_name, table_name;
size_t db_name_index = static_cast<size_t>(-1);
size_t table_name_index = static_cast<size_t>(-1);
for (size_t i = 0; i != elements.size(); ++i)
{
auto & pair = elements[i]->as<ASTPair &>();
if (pair.first == "db")
{
if (db_name_index != static_cast<size_t>(-1))
return;
db_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
db_name_index = i;
}
else if (pair.first == "table")
{
if (table_name_index != static_cast<size_t>(-1))
return;
table_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
table_name_index = i;
}
}
if (db_name.empty() || table_name.empty())
return;
String new_db_name, new_table_name;
std::tie(new_db_name, new_table_name) = data.renaming_settings.getNewTableName({db_name, table_name});
if ((new_db_name == db_name) && (new_table_name == table_name))
return;
if (new_db_name != db_name)
{
auto & pair = elements[db_name_index]->as<ASTPair &>();
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_db_name));
}
if (new_table_name != table_name)
{
auto & pair = elements[table_name_index]->as<ASTPair &>();
pair.replace(pair.second, std::make_shared<ASTLiteral>(new_table_name));
}
}
}
void DDLRenamingSettings::setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name)
{
if (old_table_name.first.empty() || old_table_name.second.empty() || new_table_name.first.empty() || new_table_name.second.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed for DDLRenamingSettings::setNewTableName");
auto it = old_to_new_table_names.find(old_table_name);
if ((it != old_to_new_table_names.end()))
{
if (it->second == new_table_name)
return;
throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that table {}.{} should be renamed to {}.{} and to {}.{} at the same time",
backQuoteIfNeed(old_table_name.first), backQuoteIfNeed(old_table_name.second),
backQuoteIfNeed(it->second.first), backQuoteIfNeed(it->second.second),
backQuoteIfNeed(new_table_name.first), backQuoteIfNeed(new_table_name.second));
}
old_to_new_table_names[old_table_name] = new_table_name;
}
void DDLRenamingSettings::setNewDatabaseName(const String & old_database_name, const String & new_database_name)
{
if (old_database_name.empty() || new_database_name.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed for DDLRenamingSettings::setNewDatabaseName");
auto it = old_to_new_database_names.find(old_database_name);
if ((it != old_to_new_database_names.end()))
{
if (it->second == new_database_name)
return;
throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that database {} should be renamed to {} and to {} at the same time",
backQuoteIfNeed(old_database_name), backQuoteIfNeed(it->second), backQuoteIfNeed(new_database_name));
}
old_to_new_database_names[old_database_name] = new_database_name;
}
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery & backup_query)
{
setFromBackupQuery(backup_query.elements);
}
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements)
{
old_to_new_table_names.clear();
old_to_new_database_names.clear();
using ElementType = ASTBackupQuery::ElementType;
for (const auto & element : backup_query_elements)
{
switch (element.type)
{
case ElementType::TABLE:
{
const String & table_name = element.name.second;
String database_name = element.name.first;
if (element.is_temp_db)
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!table_name.empty());
assert(!database_name.empty());
const String & new_table_name = element.new_name.second;
String new_database_name = element.new_name.first;
if (element.is_temp_db)
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!new_table_name.empty());
assert(!new_database_name.empty());
setNewTableName({database_name, table_name}, {new_database_name, new_table_name});
break;
}
case ASTBackupQuery::DATABASE:
{
String database_name = element.name.first;
if (element.is_temp_db)
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!database_name.empty());
String new_database_name = element.new_name.first;
if (element.is_temp_db)
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
assert(!new_database_name.empty());
setNewDatabaseName(database_name, new_database_name);
break;
}
case ASTBackupQuery::ALL_DATABASES: break;
}
}
}
DatabaseAndTableName DDLRenamingSettings::getNewTableName(const DatabaseAndTableName & old_table_name) const
{
auto it = old_to_new_table_names.find(old_table_name);
if (it != old_to_new_table_names.end())
return it->second;
return {getNewDatabaseName(old_table_name.first), old_table_name.second};
}
const String & DDLRenamingSettings::getNewDatabaseName(const String & old_database_name) const
{
auto it = old_to_new_database_names.find(old_database_name);
if (it != old_to_new_database_names.end())
return it->second;
return old_database_name;
}
bool DDLRenamingVisitor::needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
void DDLRenamingVisitor::visit(ASTPtr & ast, const Data & data)
{
if (auto * create = ast->as<ASTCreateQuery>())
visitCreateQuery(*create, data);
else if (auto * expr = ast->as<ASTTableExpression>())
visitTableExpression(*expr, data);
else if (auto * function = ast->as<ASTFunction>())
visitFunction(*function, data);
else if (auto * dictionary = ast->as<ASTDictionary>())
visitDictionary(*dictionary, data);
}
void renameInCreateQuery(ASTPtr & ast, const ContextPtr & global_context, const DDLRenamingSettings & renaming_settings)
{
try
{
DDLRenamingVisitor::Data data{renaming_settings, global_context};
DDLRenamingVisitor::Visitor{data}.visit(ast);
}
catch (...)
{
tryLogCurrentException("Backup", "Error while renaming in AST");
}
}
}

View File

@ -1,61 +0,0 @@
#pragma once
#include <Core/Types.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTBackupQuery.h>
#include <map>
#include <memory>
#include <unordered_map>
namespace DB
{
using DatabaseAndTableName = std::pair<String, String>;
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
class Context;
using ContextPtr = std::shared_ptr<const Context>;
/// Keeps information about renamings of databases or tables being processed
/// while we're making a backup or while we're restoring from a backup.
class DDLRenamingSettings
{
public:
DDLRenamingSettings() = default;
void setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name);
void setNewDatabaseName(const String & old_database_name, const String & new_database_name);
void setFromBackupQuery(const ASTBackupQuery & backup_query);
void setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements);
/// Changes names according to the renaming.
DatabaseAndTableName getNewTableName(const DatabaseAndTableName & old_table_name) const;
const String & getNewDatabaseName(const String & old_database_name) const;
private:
std::map<DatabaseAndTableName, DatabaseAndTableName> old_to_new_table_names;
std::unordered_map<String, String> old_to_new_database_names;
};
/// Changes names in AST according to the renaming settings.
void renameInCreateQuery(ASTPtr & ast, const ContextPtr & global_context, const DDLRenamingSettings & renaming_settings);
/// Visits ASTCreateQuery and changes names of tables and databases according to passed DDLRenamingConfig.
class DDLRenamingVisitor
{
public:
struct Data
{
const DDLRenamingSettings & renaming_settings;
ContextPtr context;
};
using Visitor = InDepthNodeVisitor<DDLRenamingVisitor, false>;
static bool needChildVisit(ASTPtr &, const ASTPtr &);
static void visit(ASTPtr & ast, const Data & data);
};
}

View File

@ -36,18 +36,19 @@ public:
/// Returns UUID of the backup.
virtual UUID getUUID() const = 0;
/// Returns names of entries stored in the backup.
/// If `prefix` isn't empty the function will return only the names starting with
/// the prefix (but without the prefix itself).
/// If the `terminator` isn't empty the function will returns only parts of the names
/// before the terminator. For example, list("", "") returns names of all the entries
/// in the backup; and list("data/", "/") return kind of a list of folders and
/// files stored in the "data/" directory inside the backup.
virtual Strings listFiles(const String & prefix = "", const String & terminator = "/") const = 0; /// NOLINT
/// Returns names of entries stored in a specified directory in the backup.
/// If `directory` is empty or '/' the functions returns entries in the backup's root.
virtual Strings listFiles(const String & directory, bool recursive = false) const = 0;
/// Checks if a specified directory contains any files.
/// The function returns the same as `!listFiles(directory).empty()`.
virtual bool hasFiles(const String & directory) const = 0;
using SizeAndChecksum = std::pair<UInt64, UInt128>;
/// Checks if an entry with a specified name exists.
virtual bool fileExists(const String & file_name) const = 0;
virtual bool fileExists(const std::pair<UInt64, UInt128> & size_and_checksum) const = 0;
virtual bool fileExists(const SizeAndChecksum & size_and_checksum) const = 0;
/// Returns the size of the entry's data.
/// This function does the same as `read(file_name)->getSize()` but faster.
@ -57,8 +58,6 @@ public:
/// This function does the same as `read(file_name)->getCheckum()` but faster.
virtual UInt128 getFileChecksum(const String & file_name) const = 0;
using SizeAndChecksum = std::pair<UInt64, UInt128>;
/// Returns both the size and checksum in one call.
virtual SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const = 0;

View File

@ -6,7 +6,6 @@
namespace DB
{
using DatabaseAndTableName = std::pair<String, String>;
/// Keeps information about files contained in a backup.
class IBackupCoordination
@ -14,10 +13,11 @@ class IBackupCoordination
public:
virtual ~IBackupCoordination() = default;
/// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getReplicatedTableDataPaths().
virtual void addReplicatedTableDataPath(const String & table_zk_path, const String & table_data_path) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
virtual void syncStageError(const String & current_host, const String & error_message) = 0;
struct PartNameAndChecksum
{
@ -27,30 +27,23 @@ public:
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
/// getReplicatedTablePartNames().
/// getReplicatedPartNames().
/// Checksums are used only to control that parts under the same names on different replicas are the same.
virtual void addReplicatedTablePartNames(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
= 0;
/// Sets that a specified host finished preparations for copying the backup's files, successfully or not.
/// `error_message` should be set to true if it was not successful.
virtual void finishPreparing(const String & host_id, const String & error_message = {}) = 0;
/// Waits for a specified time for specified hosts to finish preparation for copying the backup's files.
virtual void
waitForAllHostsPrepared(const Strings & host_ids, std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */) const = 0;
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedTableDataPath()).
virtual Strings getReplicatedTableDataPaths(const String & table_zk_path) const = 0;
virtual void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) = 0;
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
/// This is the same list as it was added by call of the function addReplicatedTablePartNames() but without duplications and without
/// This is the same list as it was added by call of the function addReplicatedPartNames() but without duplications and without
/// parts covered by another parts.
virtual Strings getReplicatedTablePartNames(const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path) const = 0;
virtual Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const = 0;
/// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getReplicatedDataPaths().
virtual void addReplicatedDataPath(const String & table_zk_path, const String & data_path) = 0;
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
virtual Strings getReplicatedDataPaths(const String & table_zk_path) const = 0;
struct FileInfo
{
@ -87,7 +80,8 @@ public:
virtual void updateFileInfo(const FileInfo & file_info) = 0;
virtual std::vector<FileInfo> getAllFileInfos() const = 0;
virtual Strings listFiles(const String & prefix, const String & terminator) const = 0;
virtual Strings listFiles(const String & directory, bool recursive) const = 0;
virtual bool hasFiles(const String & directory) const = 0;
using SizeAndChecksum = std::pair<UInt64, UInt128>;

View File

@ -13,53 +13,22 @@ class IRestoreCoordination
public:
virtual ~IRestoreCoordination() = default;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
virtual void syncStageError(const String & current_host, const String & error_message) = 0;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
virtual bool startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name)
= 0;
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
virtual void finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message = {})
= 0;
/// Wait for another host to create a table in a replicated database.
virtual void waitForTableCreatedInReplicatedDB(
const String & database_name,
const String & database_zk_path,
const String & table_name,
std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */)
= 0;
/// Adds a path in backup used by a replicated table.
/// This function can be called multiple times for the same table with different `host_id`, and in that case
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
virtual void addReplicatedTableDataPath(
const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & data_path_in_backup)
= 0;
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
/// In the latter case `error_message` should be set.
virtual void finishRestoringMetadata(const String & host_id, const String & error_message = {}) = 0;
/// Waits for a specified list of hosts to finish restoring their metadata.
virtual void waitForAllHostsRestoredMetadata(
const Strings & host_ids, std::chrono::seconds timeout = std::chrono::seconds(-1) /* no timeout */) const = 0;
/// Gets path in backup used by a replicated table.
virtual String getReplicatedTableDataPath(const String & table_zk_path) const = 0;
virtual bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) = 0;
/// Sets that this replica is going to restore a partition in a replicated table.
/// The function returns false if this partition is being already restored by another replica.
virtual bool startInsertingDataToPartitionInReplicatedTable(
const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & partition_name)
= 0;
virtual bool acquireInsertingDataIntoReplicatedTable(const String & table_zk_path) = 0;
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
virtual bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) = 0;
/// Removes remotely stored information.
virtual void drop() {}

View File

@ -1,36 +0,0 @@
#pragma once
#include <memory>
#include <vector>
namespace DB
{
/// Represents a task of restoring something (database / table / table's part) from backup.
class IRestoreTask
{
public:
IRestoreTask() = default;
virtual ~IRestoreTask() = default;
enum class RestoreKind
{
/// This task restores metadata (definitions of databases and tables).
/// Tasks restoring metadata are executed first and strictly in one thread.
METADATA,
/// This task restores tables' data. Such tasks can be executed in parallel.
DATA,
};
virtual RestoreKind getRestoreKind() const { return RestoreKind::DATA; }
/// Perform restoring, the function also can return a list of nested tasks that should be run later.
virtual std::vector<std::unique_ptr<IRestoreTask>> run() = 0;
};
using RestoreTaskPtr = std::unique_ptr<IRestoreTask>;
using RestoreTasks = std::vector<RestoreTaskPtr>;
}

View File

@ -1,248 +1,15 @@
#include <Backups/RestoreCoordinationDistributed.h>
#include <Backups/formatTableNameOrTemporaryTableName.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/ReadBufferFromString.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
namespace
{
struct ReplicatedTableDataPath
{
String host_id;
DatabaseAndTableName table_name;
String data_path_in_backup;
String serialize() const
{
WriteBufferFromOwnString out;
writeBinary(host_id, out);
writeBinary(table_name.first, out);
writeBinary(table_name.second, out);
writeBinary(data_path_in_backup, out);
return out.str();
}
static ReplicatedTableDataPath deserialize(const String & str)
{
ReadBufferFromString in{str};
ReplicatedTableDataPath res;
readBinary(res.host_id, in);
readBinary(res.table_name.first, in);
readBinary(res.table_name.second, in);
readBinary(res.data_path_in_backup, in);
return res;
}
};
}
class RestoreCoordinationDistributed::ReplicatedDatabasesMetadataSync
{
public:
ReplicatedDatabasesMetadataSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
: zookeeper_path(zookeeper_path_), get_zookeeper(get_zookeeper_), log(&Poco::Logger::get("RestoreCoordination"))
{
createRootNodes();
}
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool startCreatingTable(
const String & host_id_, const String & database_name_, const String & database_zk_path_, const String & table_name_)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/" + escapeForFileName(database_zk_path_);
zookeeper->createIfNotExists(path, "");
TableStatus status;
status.host_id = host_id_;
status.table_name = DatabaseAndTableName{database_name_, table_name_};
path += "/" + escapeForFileName(table_name_);
auto code = zookeeper->tryCreate(path, status.serialize(), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
void finishCreatingTable(
const String & /* host_id_ */,
const String & database_name_,
const String & database_zk_path_,
const String & table_name_,
const String & error_message_)
{
if (error_message_.empty())
LOG_TRACE(log, "Created table {}.{}", database_name_, table_name_);
else
LOG_TRACE(log, "Failed to created table {}.{}: {}", database_name_, table_name_, error_message_);
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/" + escapeForFileName(database_zk_path_) + "/" + escapeForFileName(table_name_);
auto status = TableStatus::deserialize(zookeeper->get(path));
status.error_message = error_message_;
status.ready = error_message_.empty();
zookeeper->set(path, status.serialize());
}
/// Wait for another host to create a table in a replicated database.
void waitForTableCreated(
const String & /* database_name_ */, const String & database_zk_path_, const String & table_name_, std::chrono::seconds timeout_)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/" + escapeForFileName(database_zk_path_) + "/" + escapeForFileName(table_name_);
TableStatus status;
std::atomic<bool> watch_set = false;
std::condition_variable watch_triggered_event;
auto watch_callback = [&](const Coordination::WatchResponse &)
{
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
watch_triggered_event.notify_all();
};
auto watch_triggered = [&] { return !watch_set; };
bool use_timeout = (timeout_.count() >= 0);
std::chrono::steady_clock::duration time_left = timeout_;
std::mutex dummy_mutex;
while (true)
{
if (use_timeout && (time_left.count() <= 0))
{
status = TableStatus::deserialize(zookeeper->get(path));
break;
}
watch_set = true;
status = TableStatus::deserialize(zookeeper->getWatch(path, nullptr, watch_callback));
if (!status.error_message.empty() || status.ready)
break;
LOG_TRACE(log, "Waiting for host {} to create table {}.{}", status.host_id, status.table_name.first, status.table_name.second);
{
std::unique_lock dummy_lock{dummy_mutex};
if (use_timeout)
{
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
if (!watch_triggered_event.wait_for(dummy_lock, time_left, watch_triggered))
break;
time_left -= (std::chrono::steady_clock::now() - start_time);
}
else
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
}
if (watch_set)
{
/// Remove watch by triggering it.
++status.increment;
zookeeper->set(path, status.serialize());
std::unique_lock dummy_lock{dummy_mutex};
watch_triggered_event.wait_for(dummy_lock, timeout_, watch_triggered);
}
if (!status.error_message.empty())
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} failed to create table {}.{}: {}", status.host_id, status.table_name.first, status.table_name.second, status.error_message);
if (status.ready)
{
LOG_TRACE(log, "Host {} created table {}.{}", status.host_id, status.table_name.first, status.table_name.second);
return;
}
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Host {} was unable to create table {}.{} in {}",
status.host_id,
status.table_name.first,
table_name_,
to_string(timeout_));
}
private:
void createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
struct TableStatus
{
String host_id;
DatabaseAndTableName table_name;
bool ready = false;
String error_message;
size_t increment = 0;
String serialize() const
{
WriteBufferFromOwnString out;
writeBinary(host_id, out);
writeBinary(table_name.first, out);
writeBinary(table_name.second, out);
writeBinary(ready, out);
writeBinary(error_message, out);
writeBinary(increment, out);
return out.str();
}
static TableStatus deserialize(const String & str)
{
ReadBufferFromString in{str};
TableStatus res;
readBinary(res.host_id, in);
readBinary(res.table_name.first, in);
readBinary(res.table_name.second, in);
readBinary(res.ready, in);
readBinary(res.error_message, in);
readBinary(res.increment, in);
return res;
}
};
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
const Poco::Logger * log;
};
RestoreCoordinationDistributed::RestoreCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, replicated_databases_metadata_sync(
std::make_unique<ReplicatedDatabasesMetadataSync>(zookeeper_path_ + "/repl_databases_metadata", get_zookeeper_))
, all_metadata_barrier(zookeeper_path_ + "/all_metadata", get_zookeeper_, "RestoreCoordination", "restoring metadata")
, stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("RestoreCoordination"))
{
createRootNodes();
}
@ -254,8 +21,58 @@ void RestoreCoordinationDistributed::createRootNodes()
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_partitions", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
}
void RestoreCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout)
{
stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout);
}
void RestoreCoordinationDistributed::syncStageError(const String & current_host, const String & error_message)
{
stage_sync.syncStageError(current_host, error_message);
}
bool RestoreCoordinationDistributed::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
bool RestoreCoordinationDistributed::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
bool RestoreCoordinationDistributed::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK);
}
void RestoreCoordinationDistributed::removeAllNodes()
@ -264,104 +81,6 @@ void RestoreCoordinationDistributed::removeAllNodes()
zookeeper->removeRecursive(zookeeper_path);
}
bool RestoreCoordinationDistributed::startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name)
{
return replicated_databases_metadata_sync->startCreatingTable(host_id, database_name, database_zk_path, table_name);
}
/// Ends creating table in a replicated database, successfully or with an error.
/// In the latter case `error_message` should be set.
void RestoreCoordinationDistributed::finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message)
{
return replicated_databases_metadata_sync->finishCreatingTable(host_id, database_name, database_zk_path, table_name, error_message);
}
/// Wait for another host to create a table in a replicated database.
void RestoreCoordinationDistributed::waitForTableCreatedInReplicatedDB(
const String & database_name, const String & database_zk_path, const String & table_name, std::chrono::seconds timeout)
{
return replicated_databases_metadata_sync->waitForTableCreated(database_name, database_zk_path, table_name, timeout);
}
void RestoreCoordinationDistributed::finishRestoringMetadata(const String & host_id, const String & error_message)
{
all_metadata_barrier.finish(host_id, error_message);
}
void RestoreCoordinationDistributed::waitForAllHostsRestoredMetadata(const Strings & host_ids, std::chrono::seconds timeout) const
{
all_metadata_barrier.waitForAllHostsToFinish(host_ids, timeout);
}
void RestoreCoordinationDistributed::addReplicatedTableDataPath(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths/" + escapeForFileName(table_zk_path);
ReplicatedTableDataPath new_info;
new_info.host_id = host_id;
new_info.table_name = table_name;
new_info.data_path_in_backup = data_path_in_backup;
String new_info_str = new_info.serialize();
auto code = zookeeper->tryCreate(path, new_info_str, zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
while (code != Coordination::Error::ZOK)
{
Coordination::Stat stat;
ReplicatedTableDataPath cur_info = ReplicatedTableDataPath::deserialize(zookeeper->get(path, &stat));
if ((cur_info.host_id < host_id) || ((cur_info.host_id == host_id) && (cur_info.table_name <= table_name)))
break;
code = zookeeper->trySet(path, new_info_str, stat.version);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZBADVERSION))
throw zkutil::KeeperException(code, path);
}
}
String RestoreCoordinationDistributed::getReplicatedTableDataPath(const String & table_zk_path_) const
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_paths/" + escapeForFileName(table_zk_path_);
auto info = ReplicatedTableDataPath::deserialize(zookeeper->get(path));
return info.data_path_in_backup;
}
bool RestoreCoordinationDistributed::startInsertingDataToPartitionInReplicatedTable(
const String & host_id_,
const DatabaseAndTableName & table_name_,
const String & table_zk_path_,
const String & partition_name_)
{
auto zookeeper = get_zookeeper();
String path = zookeeper_path + "/repl_tables_partitions/" + escapeForFileName(table_zk_path_);
zookeeper->createIfNotExists(path, "");
path += "/" + escapeForFileName(partition_name_);
String new_info = host_id_ + "|" + table_name_.first + "|" + table_name_.second;
auto code = zookeeper->tryCreate(path, new_info, zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
if (code == Coordination::Error::ZOK)
return true;
return zookeeper->get(path) == new_info;
}
void RestoreCoordinationDistributed::drop()
{
removeAllNodes();

View File

@ -2,7 +2,6 @@
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationHelpers.h>
#include <Common/ZooKeeper/Common.h>
namespace DB
@ -15,50 +14,22 @@ public:
RestoreCoordinationDistributed(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
~RestoreCoordinationDistributed() override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
void syncStageError(const String & current_host, const String & error_message) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name) override;
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
void finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message) override;
/// Wait for another host to create a table in a replicated database.
void waitForTableCreatedInReplicatedDB(
const String & database_name, const String & database_zk_path, const String & table_name, std::chrono::seconds timeout) override;
/// Sets path in backup used by a replicated table.
/// This function can be called multiple times for the same table with different `host_id`, and in that case
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
void addReplicatedTableDataPath(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup) override;
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
/// In the latter case `error_message` should be set.
void finishRestoringMetadata(const String & host_id, const String & error_message) override;
/// Waits for all hosts to finish restoring their metadata (i.e. to finish creating databases and tables). Returns false if time is out.
void waitForAllHostsRestoredMetadata(const Strings & host_ids, std::chrono::seconds timeout) const override;
/// Gets path in backup used by a replicated table.
String getReplicatedTableDataPath(const String & table_zk_path) const override;
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
/// Sets that this replica is going to restore a partition in a replicated table.
/// The function returns false if this partition is being already restored by another replica.
bool startInsertingDataToPartitionInReplicatedTable(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & partition_name) override;
bool acquireInsertingDataIntoReplicatedTable(const String & table_zk_path) override;
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
/// Removes remotely stored information.
void drop() override;
@ -71,8 +42,7 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
std::unique_ptr<ReplicatedDatabasesMetadataSync> replicated_databases_metadata_sync;
BackupCoordinationDistributedBarrier all_metadata_barrier;
BackupCoordinationStageSync stage_sync;
};
}

View File

@ -1,107 +1,35 @@
#include <Backups/RestoreCoordinationLocal.h>
#include <Backups/formatTableNameOrTemporaryTableName.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
RestoreCoordinationLocal::RestoreCoordinationLocal()
: log(&Poco::Logger::get("RestoreCoordination"))
{}
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
bool RestoreCoordinationLocal::startCreatingTableInReplicatedDB(
const String & /* host_id */,
const String & /* database_name */,
const String & /* database_zk_path */,
const String & /* table_name */)
void RestoreCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds)
{
}
void RestoreCoordinationLocal::syncStageError(const String &, const String &)
{
}
bool RestoreCoordinationLocal::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
std::lock_guard lock{mutex};
return acquired_tables_in_replicated_databases.emplace(std::pair<String, String>{database_zk_path, table_name}).second;
}
bool RestoreCoordinationLocal::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
std::lock_guard lock{mutex};
return acquired_data_in_replicated_tables.emplace(table_zk_path).second;
}
bool RestoreCoordinationLocal::acquireReplicatedAccessStorage(const String &)
{
return true;
}
void RestoreCoordinationLocal::finishCreatingTableInReplicatedDB(
const String & /* host_id */,
const String & database_name,
const String & /* database_zk_path */,
const String & table_name,
const String & error_message)
{
if (error_message.empty())
LOG_TRACE(log, "Created table {}.{}", database_name, table_name);
else
LOG_TRACE(log, "Failed to created table {}.{}: {}", database_name, table_name, error_message);
}
/// Wait for another host to create a table in a replicated database.
void RestoreCoordinationLocal::waitForTableCreatedInReplicatedDB(
const String & /* database_name */,
const String & /* database_zk_path */,
const String & /* table_name */,
std::chrono::seconds /* timeout */)
{
}
void RestoreCoordinationLocal::finishRestoringMetadata(const String & /* host_id */, const String & error_message)
{
LOG_TRACE(log, "Finished restoring metadata{}", (error_message.empty() ? "" : (" with error " + error_message)));
}
void RestoreCoordinationLocal::waitForAllHostsRestoredMetadata(const Strings & /* host_ids */, std::chrono::seconds /* timeout */) const
{
}
void RestoreCoordinationLocal::addReplicatedTableDataPath(const String & /* host_id */,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup)
{
std::lock_guard lock{mutex};
auto it = replicated_tables_data_paths.find(table_zk_path);
if (it == replicated_tables_data_paths.end())
{
ReplicatedTableDataPath new_info;
new_info.table_name = table_name;
new_info.data_path_in_backup = data_path_in_backup;
replicated_tables_data_paths.emplace(table_zk_path, std::move(new_info));
return;
}
else
{
auto & cur_info = it->second;
if (table_name < cur_info.table_name)
{
cur_info.table_name = table_name;
cur_info.data_path_in_backup = data_path_in_backup;
}
}
}
String RestoreCoordinationLocal::getReplicatedTableDataPath(const String & table_zk_path) const
{
std::lock_guard lock{mutex};
auto it = replicated_tables_data_paths.find(table_zk_path);
if (it == replicated_tables_data_paths.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicated data path is not set for zk_path={}", table_zk_path);
return it->second.data_path_in_backup;
}
bool RestoreCoordinationLocal::startInsertingDataToPartitionInReplicatedTable(
const String & /* host_id */, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & partition_name)
{
std::lock_guard lock{mutex};
auto key = std::pair{table_zk_path, partition_name};
auto it = replicated_tables_partitions.try_emplace(std::move(key), table_name).first;
return it->second == table_name;
}
}

View File

@ -1,10 +1,9 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <condition_variable>
#include <map>
#include <mutex>
#include <unordered_map>
#include <set>
#include <unordered_set>
namespace Poco { class Logger; }
@ -18,64 +17,27 @@ public:
RestoreCoordinationLocal();
~RestoreCoordinationLocal() override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override;
/// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage().
void syncStageError(const String & current_host, const String & error_message) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool startCreatingTableInReplicatedDB(
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name) override;
/// Sets that either we have been created a table in a replicated database or failed doing that.
/// In the latter case `error_message` should be set.
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
void finishCreatingTableInReplicatedDB(
const String & host_id,
const String & database_name,
const String & database_zk_path,
const String & table_name,
const String & error_message) override;
/// Wait for another host to create a table in a replicated database.
void waitForTableCreatedInReplicatedDB(
const String & database_name, const String & database_zk_path, const String & table_name, std::chrono::seconds timeout) override;
/// Sets path in backup used by a replicated table.
/// This function can be called multiple times for the same table with different `host_id`, and in that case
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
void addReplicatedTableDataPath(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & data_path_in_backup) override;
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
/// In the latter case `error_message` should be set.
void finishRestoringMetadata(const String & host_id, const String & error_message) override;
/// Waits for all hosts to finish restoring their metadata (i.e. to finish creating databases and tables). Returns false if time is out.
void waitForAllHostsRestoredMetadata(const Strings & host_ids, std::chrono::seconds timeout) const override;
/// Gets path in backup used by a replicated table.
String getReplicatedTableDataPath(const String & table_zk_path) const override;
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
/// Sets that this replica is going to restore a partition in a replicated table.
/// The function returns false if this partition is being already restored by another replica.
bool startInsertingDataToPartitionInReplicatedTable(
const String & host_id,
const DatabaseAndTableName & table_name,
const String & table_zk_path,
const String & partition_name) override;
bool acquireInsertingDataIntoReplicatedTable(const String & table_zk_path) override;
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
private:
struct ReplicatedTableDataPath
{
DatabaseAndTableName table_name;
String data_path_in_backup;
};
std::unordered_map<String /* table_zk_path */, ReplicatedTableDataPath> replicated_tables_data_paths;
std::map<std::pair<String /* table_zk_path */, String /* partition_name */>, DatabaseAndTableName> replicated_tables_partitions;
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
mutable std::mutex mutex;
const Poco::Logger * log;
};
}

View File

@ -29,22 +29,43 @@ namespace
if (field.getType() == Field::Types::String)
{
const String & str = field.get<const String &>();
if (str == "1" || boost::iequals(str, "true"))
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
{
value = RestoreTableCreationMode::kCreate;
else if (str == "0" || boost::iequals(str, "false"))
return;
}
if (str == "0" || boost::iequals(str, "false") || boost::iequals(str, "must exist") || boost::iequals(str, "must-exist"))
{
value = RestoreTableCreationMode::kMustExist;
else if (boost::iequals(str, "if not exists"))
return;
}
if (boost::iequals(str, "if not exists") || boost::iequals(str, "if-not-exists")
|| boost::iequals(str, "create if not exists") || boost::iequals(str, "create-if-not-exists"))
{
value = RestoreTableCreationMode::kCreateIfNotExists;
else throw Exception("Cannot parse creation mode from string '" + str + "'",
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS);
return;
}
}
else
if (field.getType() == Field::Types::UInt64)
{
if (applyVisitor(FieldVisitorConvertToNumber<bool>(), field))
UInt64 number = field.get<UInt64>();
if (number == 1)
{
value = RestoreTableCreationMode::kCreate;
else
return;
}
if (number == 0)
{
value = RestoreTableCreationMode::kMustExist;
return;
}
}
throw Exception(ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Cannot parse creation mode from {}", field);
}
explicit operator Field() const
@ -60,6 +81,62 @@ namespace
};
using SettingFieldRestoreDatabaseCreationMode = SettingFieldRestoreTableCreationMode;
struct SettingFieldRestoreAccessCreationMode
{
RestoreAccessCreationMode value;
explicit SettingFieldRestoreAccessCreationMode(RestoreAccessCreationMode value_) : value(value_) {}
explicit SettingFieldRestoreAccessCreationMode(const Field & field)
{
if (field.getType() == Field::Types::String)
{
const String & str = field.get<const String &>();
if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create"))
{
value = RestoreAccessCreationMode::kCreate;
return;
}
if (boost::iequals(str, "if not exists") || boost::iequals(str, "if-not-exists")
|| boost::iequals(str, "create if not exists") || boost::iequals(str, "create-if-not-exists"))
{
value = RestoreAccessCreationMode::kCreateIfNotExists;
return;
}
if (boost::iequals(str, "replace") || boost::iequals(str, "create or replace") || boost::iequals(str, "create-or-replace"))
{
value = RestoreAccessCreationMode::kReplace;
return;
}
}
if (field.getType() == Field::Types::UInt64)
{
UInt64 number = field.get<UInt64>();
if (number == 1)
{
value = RestoreAccessCreationMode::kCreate;
return;
}
}
throw Exception(ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Cannot parse creation mode from {}", field);
}
explicit operator Field() const
{
switch (value)
{
case RestoreAccessCreationMode::kCreate: return Field{true};
case RestoreAccessCreationMode::kCreateIfNotExists: return Field{"if not exists"};
case RestoreAccessCreationMode::kReplace: return Field{"replace"};
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected value of enum RestoreAccessCreationMode: {}", static_cast<int>(value));
}
};
}
/// List of restore settings except base_backup_name and cluster_host_ids.
@ -76,6 +153,8 @@ namespace
M(UInt64, shard_num_in_backup) \
M(UInt64, replica_num_in_backup) \
M(Bool, allow_non_empty_tables) \
M(RestoreAccessCreationMode, create_access) \
M(Bool, allow_unresolved_access_dependencies) \
M(Bool, internal) \
M(String, host_id) \
M(String, coordination_zk_path)

View File

@ -8,13 +8,6 @@ namespace DB
{
class ASTBackupQuery;
struct StorageRestoreSettings
{
/// Internal, should not be specified by user.
/// The current host's ID in the format 'escaped_host_name:port'.
String host_id;
};
/// How the RESTORE command will handle table/database existence.
enum class RestoreTableCreationMode
{
@ -30,8 +23,21 @@ enum class RestoreTableCreationMode
using RestoreDatabaseCreationMode = RestoreTableCreationMode;
/// How the RESTORE command will handle if an user (or role or profile) which it's going to restore already exists.
enum class RestoreAccessCreationMode
{
/// RESTORE will throw an exception if some user already exists.
kCreate,
/// RESTORE will skip existing users.
kCreateIfNotExists,
/// RESTORE will replace existing users with definitions from backup.
kReplace,
};
/// Settings specified in the "SETTINGS" clause of a RESTORE query.
struct RestoreSettings : public StorageRestoreSettings
struct RestoreSettings
{
/// Base backup, with this setting we can override the location of the base backup while restoring.
/// Any incremental backup keeps inside the information about its base backup, so using this setting is optional.
@ -86,9 +92,20 @@ struct RestoreSettings : public StorageRestoreSettings
/// Setting "allow_non_empty_tables=true" thus can cause data duplication in the table, use with caution.
bool allow_non_empty_tables = false;
/// How the RESTORE command will handle if an user (or role or profile) which it's going to restore already exists.
RestoreAccessCreationMode create_access = RestoreAccessCreationMode::kCreateIfNotExists;
/// Skip dependencies of access entities which can't be resolved.
/// For example, if an user has a profile assigned and that profile is not in the backup and doesn't exist locally.
bool allow_unresolved_access_dependencies = false;
/// Internal, should not be specified by user.
bool internal = false;
/// Internal, should not be specified by user.
/// The current host's ID in the format 'escaped_host_name:port'.
String host_id;
/// Internal, should not be specified by user.
/// Cluster's hosts' IDs in the format 'escaped_host_name:port' for all shards and replicas in a cluster specified in BACKUP ON CLUSTER.
std::vector<Strings> cluster_host_ids;

File diff suppressed because it is too large Load Diff

View File

@ -1,38 +0,0 @@
#pragma once
#include <Parsers/ASTBackupQuery.h>
#include <Common/ThreadPool.h>
namespace DB
{
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
class IRestoreTask;
using RestoreTaskPtr = std::unique_ptr<IRestoreTask>;
using RestoreTasks = std::vector<RestoreTaskPtr>;
struct RestoreSettings;
class IRestoreCoordination;
class AccessRightsElements;
class Context;
using ContextPtr = std::shared_ptr<const Context>;
using ContextMutablePtr = std::shared_ptr<Context>;
/// Prepares restore tasks.
RestoreTasks makeRestoreTasks(ContextMutablePtr context, const BackupPtr & backup, const ASTBackupQuery::Elements & elements, const RestoreSettings & restore_settings, const std::shared_ptr<IRestoreCoordination> & restore_coordination, std::chrono::seconds timeout_for_restoring_metadata);
/// Executes restore tasks.
void restoreMetadata(
RestoreTasks & restore_tasks,
const RestoreSettings & restore_settings,
const std::shared_ptr<IRestoreCoordination> & restore_coordination,
std::chrono::seconds timeout_for_restoring_metadata);
void restoreData(RestoreTasks & restore_tasks, ThreadPool & thread_pool);
/// Returns access required to execute RESTORE query.
AccessRightsElements getRequiredAccessToRestore(const ASTBackupQuery::Elements & elements, const RestoreSettings & restore_settings);
}

View File

@ -0,0 +1,817 @@
#include <Backups/RestorerFromBackup.h>
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupSettings.h>
#include <Backups/IBackup.h>
#include <Backups/IBackupEntry.h>
#include <Backups/BackupUtils.h>
#include <Access/AccessBackup.h>
#include <Access/AccessRights.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Databases/IDatabase.h>
#include <Databases/DDLDependencyVisitor.h>
#include <Storages/IStorage.h>
#include <Common/escapeForFileName.h>
#include <Common/quoteString.h>
#include <base/insertAtEnd.h>
#include <boost/algorithm/string/join.hpp>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int BACKUP_ENTRY_NOT_FOUND;
extern const int CANNOT_RESTORE_TABLE;
extern const int CANNOT_RESTORE_DATABASE;
extern const int LOGICAL_ERROR;
}
namespace
{
constexpr const std::string_view sql_ext = ".sql";
String tryGetTableEngine(const IAST & ast)
{
const ASTCreateQuery * create = ast.as<ASTCreateQuery>();
if (!create)
return {};
if (!create->storage || !create->storage->engine)
return {};
return create->storage->engine->name;
}
bool hasSystemTableEngine(const IAST & ast)
{
return tryGetTableEngine(ast).starts_with("System");
}
bool hasSystemAccessTableEngine(const IAST & ast)
{
String engine_name = tryGetTableEngine(ast);
return (engine_name == "SystemUsers") || (engine_name == "SystemRoles") || (engine_name == "SystemSettingsProfiles")
|| (engine_name == "SystemRowPolicies") || (engine_name == "SystemQuotas");
}
}
bool RestorerFromBackup::TableKey::operator ==(const TableKey & right) const
{
return (name == right.name) && (is_temporary == right.is_temporary);
}
bool RestorerFromBackup::TableKey::operator <(const TableKey & right) const
{
return (name < right.name) || ((name == right.name) && (is_temporary < right.is_temporary));
}
std::string_view RestorerFromBackup::toString(Stage stage)
{
switch (stage)
{
case Stage::kPreparing: return "Preparing";
case Stage::kFindingTablesInBackup: return "Finding tables in backup";
case Stage::kCreatingDatabases: return "Creating databases";
case Stage::kCreatingTables: return "Creating tables";
case Stage::kInsertingDataToTables: return "Inserting data to tables";
case Stage::kError: return "Error";
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown restore stage: {}", static_cast<int>(stage));
}
RestorerFromBackup::RestorerFromBackup(
const ASTBackupQuery::Elements & restore_query_elements_,
const RestoreSettings & restore_settings_,
std::shared_ptr<IRestoreCoordination> restore_coordination_,
const BackupPtr & backup_,
const ContextMutablePtr & context_,
std::chrono::seconds timeout_)
: restore_query_elements(restore_query_elements_)
, restore_settings(restore_settings_)
, restore_coordination(restore_coordination_)
, backup(backup_)
, context(context_)
, timeout(timeout_)
, log(&Poco::Logger::get("RestorerFromBackup"))
{
}
RestorerFromBackup::~RestorerFromBackup() = default;
void RestorerFromBackup::restoreMetadata()
{
run(/* only_check_access= */ false);
}
void RestorerFromBackup::checkAccessOnly()
{
run(/* only_check_access= */ true);
}
void RestorerFromBackup::run(bool only_check_access)
{
try
{
/// restoreMetadata() must not be called multiple times.
if (current_stage != Stage::kPreparing)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring");
/// Calculate the root path in the backup for restoring, it's either empty or has the format "shards/<shard_num>/replicas/<replica_num>/".
findRootPathsInBackup();
/// Do renaming in the create queries according to the renaming config.
renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements);
/// Find all the databases and tables which we will read from the backup.
setStage(Stage::kFindingTablesInBackup);
collectDatabaseAndTableInfos();
/// Check access rights.
checkAccessForCollectedInfos();
if (only_check_access)
return;
/// Create databases using the create queries read from the backup.
setStage(Stage::kCreatingDatabases);
createDatabases();
/// Create tables using the create queries read from the backup.
setStage(Stage::kCreatingTables);
createTables();
/// All what's left is to insert data to tables.
/// No more data restoring tasks are allowed after this point.
setStage(Stage::kInsertingDataToTables);
}
catch (...)
{
try
{
/// Other hosts should know that we've encountered an error.
setStage(Stage::kError, getCurrentExceptionMessage(false));
}
catch (...)
{
}
throw;
}
}
RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks()
{
if (current_stage != Stage::kInsertingDataToTables)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Metadata wasn't restored");
if (data_restore_tasks.empty() && !access_restore_task)
return {};
LOG_TRACE(log, "Will insert data to tables");
/// Storages and table locks must exist while we're executing data restoring tasks.
auto storages = std::make_shared<std::vector<StoragePtr>>();
auto table_locks = std::make_shared<std::vector<TableLockHolder>>();
storages->reserve(table_infos.size());
table_locks->reserve(table_infos.size());
for (const auto & table_info : table_infos | boost::adaptors::map_values)
{
storages->push_back(table_info.storage);
table_locks->push_back(table_info.table_lock);
}
DataRestoreTasks res_tasks;
for (const auto & task : data_restore_tasks)
res_tasks.push_back([task, storages, table_locks] { task(); });
if (access_restore_task)
res_tasks.push_back([task = access_restore_task, access_control = &context->getAccessControl()] { task->restore(*access_control); });
return res_tasks;
}
void RestorerFromBackup::setStage(Stage new_stage, const String & error_message)
{
if (new_stage == Stage::kError)
LOG_ERROR(log, "{} failed with error: {}", toString(current_stage), error_message);
else
LOG_TRACE(log, "{}", toString(new_stage));
current_stage = new_stage;
if (!restore_coordination)
return;
if (new_stage == Stage::kError)
{
restore_coordination->syncStageError(restore_settings.host_id, error_message);
}
else
{
auto all_hosts
= BackupSettings::Util::filterHostIDs(restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->syncStage(restore_settings.host_id, static_cast<int>(new_stage), all_hosts, timeout);
}
}
void RestorerFromBackup::findRootPathsInBackup()
{
size_t shard_num = 1;
size_t replica_num = 1;
if (!restore_settings.host_id.empty())
{
std::tie(shard_num, replica_num)
= BackupSettings::Util::findShardNumAndReplicaNum(restore_settings.cluster_host_ids, restore_settings.host_id);
}
root_paths_in_backup.clear();
/// Start with "" as the root path and then we will add shard- and replica-related part to it.
fs::path root_path = "/";
root_paths_in_backup.push_back(root_path);
/// Add shard-related part to the root path.
Strings shards_in_backup = backup->listFiles(root_path / "shards");
if (shards_in_backup.empty())
{
if (restore_settings.shard_num_in_backup > 1)
throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No shard #{} in backup", restore_settings.shard_num_in_backup);
}
else
{
String shard_name;
if (restore_settings.shard_num_in_backup)
shard_name = std::to_string(restore_settings.shard_num_in_backup);
else if (shards_in_backup.size() == 1)
shard_name = shards_in_backup.front();
else
shard_name = std::to_string(shard_num);
if (std::find(shards_in_backup.begin(), shards_in_backup.end(), shard_name) == shards_in_backup.end())
throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No shard #{} in backup", shard_name);
root_path = root_path / "shards" / shard_name;
root_paths_in_backup.push_back(root_path);
}
/// Add replica-related part to the root path.
Strings replicas_in_backup = backup->listFiles(root_path / "replicas");
if (replicas_in_backup.empty())
{
if (restore_settings.replica_num_in_backup > 1)
throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No replica #{} in backup", restore_settings.replica_num_in_backup);
}
else
{
String replica_name;
if (restore_settings.replica_num_in_backup)
{
replica_name = std::to_string(restore_settings.replica_num_in_backup);
if (std::find(replicas_in_backup.begin(), replicas_in_backup.end(), replica_name) == replicas_in_backup.end())
throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "No replica #{} in backup", replica_name);
}
else
{
replica_name = std::to_string(replica_num);
if (std::find(replicas_in_backup.begin(), replicas_in_backup.end(), replica_name) == replicas_in_backup.end())
replica_name = replicas_in_backup.front();
}
root_path = root_path / "replicas" / replica_name;
root_paths_in_backup.push_back(root_path);
}
/// Revert the list of root paths, because we need it in the following order:
/// "/shards/<shard_num>/replicas/<replica_num>/" (first we search tables here)
/// "/shards/<shard_num>/" (then here)
/// "/" (and finally here)
std::reverse(root_paths_in_backup.begin(), root_paths_in_backup.end());
LOG_TRACE(
log,
"Will use paths in backup: {}",
boost::algorithm::join(
root_paths_in_backup
| boost::adaptors::transformed([](const fs::path & path) -> String { return doubleQuoteString(String{path}); }),
", "));
}
void RestorerFromBackup::collectDatabaseAndTableInfos()
{
database_infos.clear();
table_infos.clear();
for (const auto & element : restore_query_elements)
{
switch (element.type)
{
case ASTBackupQuery::ElementType::TABLE:
{
collectTableInfo({element.database_name, element.table_name}, false, element.partitions);
break;
}
case ASTBackupQuery::ElementType::TEMPORARY_TABLE:
{
collectTableInfo({element.database_name, element.table_name}, true, element.partitions);
break;
}
case ASTBackupQuery::ElementType::DATABASE:
{
collectDatabaseInfo(element.database_name, element.except_tables, /* throw_if_no_database_metadata_in_backup= */ true);
break;
}
case ASTBackupQuery::ElementType::ALL:
{
collectAllDatabasesInfo(element.except_databases, element.except_tables);
break;
}
}
}
LOG_INFO(log, "Will restore {} databases and {} tables", database_infos.size(), table_infos.size());
}
void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_in_backup, bool is_temporary_table, const std::optional<ASTs> & partitions)
{
String database_name_in_backup = is_temporary_table ? DatabaseCatalog::TEMPORARY_DATABASE : table_name_in_backup.database;
std::optional<fs::path> metadata_path;
std::optional<fs::path> root_path_in_use;
for (const auto & root_path_in_backup : root_paths_in_backup)
{
fs::path try_metadata_path;
if (is_temporary_table)
{
try_metadata_path
= root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(table_name_in_backup.table) + ".sql");
}
else
{
try_metadata_path = root_path_in_backup / "metadata" / escapeForFileName(table_name_in_backup.database)
/ (escapeForFileName(table_name_in_backup.table) + ".sql");
}
if (backup->fileExists(try_metadata_path))
{
metadata_path = try_metadata_path;
root_path_in_use = root_path_in_backup;
break;
}
}
if (!metadata_path)
throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Table {} not found in backup", table_name_in_backup.getFullName());
TableKey table_key;
fs::path data_path_in_backup;
if (is_temporary_table)
{
data_path_in_backup = *root_path_in_use / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup.table);
table_key.name.table = renaming_map.getNewTemporaryTableName(table_name_in_backup.table);
table_key.is_temporary = true;
}
else
{
data_path_in_backup
= *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table);
table_key.name = renaming_map.getNewTableName(table_name_in_backup);
}
auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer();
String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset();
ParserCreateQuery create_parser;
ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, create_table_query);
if (auto it = table_infos.find(table_key); it != table_infos.end())
{
const TableInfo & table_info = it->second;
if (table_info.create_table_query && (serializeAST(*table_info.create_table_query) != serializeAST(*create_table_query)))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TABLE,
"Extracted two different create queries for the same {}table {}: {} and {}",
(is_temporary_table ? "temporary " : ""),
table_key.name.getFullName(),
serializeAST(*table_info.create_table_query),
serializeAST(*create_table_query));
}
}
TableInfo & res_table_info = table_infos[table_key];
res_table_info.create_table_query = create_table_query;
res_table_info.data_path_in_backup = data_path_in_backup;
res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_key.name, create_table_query);
if (partitions)
{
if (!res_table_info.partitions)
res_table_info.partitions.emplace();
insertAtEnd(*res_table_info.partitions, *partitions);
}
if (hasSystemAccessTableEngine(*create_table_query))
{
if (!access_restore_task)
access_restore_task = std::make_shared<AccessRestoreTask>(backup, restore_settings, restore_coordination);
access_restore_task->addDataPath(data_path_in_backup);
}
}
void RestorerFromBackup::collectDatabaseInfo(const String & database_name_in_backup, const std::set<DatabaseAndTableName> & except_table_names, bool throw_if_no_database_metadata_in_backup)
{
std::optional<fs::path> metadata_path;
std::unordered_set<String> table_names_in_backup;
for (const auto & root_path_in_backup : root_paths_in_backup)
{
fs::path try_metadata_path = root_path_in_backup / "metadata" / (escapeForFileName(database_name_in_backup) + ".sql");
if (!metadata_path && backup->fileExists(try_metadata_path))
metadata_path = try_metadata_path;
Strings file_names = backup->listFiles(root_path_in_backup / "metadata" / escapeForFileName(database_name_in_backup));
for (const String & file_name : file_names)
{
if (!file_name.ends_with(sql_ext))
continue;
String file_name_without_ext = file_name.substr(0, file_name.length() - sql_ext.length());
table_names_in_backup.insert(unescapeForFileName(file_name_without_ext));
}
}
if (!metadata_path && throw_if_no_database_metadata_in_backup)
throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Database {} not found in backup", backQuoteIfNeed(database_name_in_backup));
if (metadata_path)
{
auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer();
String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset();
ParserCreateQuery create_parser;
ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, create_database_query);
String database_name = renaming_map.getNewDatabaseName(database_name_in_backup);
DatabaseInfo & database_info = database_infos[database_name];
if (database_info.create_database_query && (serializeAST(*database_info.create_database_query) != serializeAST(*create_database_query)))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_DATABASE,
"Extracted two different create queries for the same database {}: {} and {}",
backQuoteIfNeed(database_name),
serializeAST(*database_info.create_database_query),
serializeAST(*create_database_query));
}
database_info.create_database_query = create_database_query;
}
for (const String & table_name_in_backup : table_names_in_backup)
{
if (except_table_names.contains({database_name_in_backup, table_name_in_backup}))
continue;
collectTableInfo({database_name_in_backup, table_name_in_backup}, /* is_temporary_table= */ false, /* partitions= */ {});
}
}
void RestorerFromBackup::collectAllDatabasesInfo(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names)
{
std::unordered_set<String> database_names_in_backup;
std::unordered_set<String> temporary_table_names_in_backup;
for (const auto & root_path_in_backup : root_paths_in_backup)
{
Strings file_names = backup->listFiles(root_path_in_backup / "metadata");
for (String & file_name : file_names)
{
if (file_name.ends_with(sql_ext))
file_name.resize(file_name.length() - sql_ext.length());
database_names_in_backup.emplace(unescapeForFileName(file_name));
}
file_names = backup->listFiles(root_path_in_backup / "temporary_tables" / "metadata");
for (String & file_name : file_names)
{
if (!file_name.ends_with(sql_ext))
continue;
file_name.resize(file_name.length() - sql_ext.length());
temporary_table_names_in_backup.emplace(unescapeForFileName(file_name));
}
}
for (const String & database_name_in_backup : database_names_in_backup)
{
if (except_database_names.contains(database_name_in_backup))
continue;
collectDatabaseInfo(database_name_in_backup, except_table_names, /* throw_if_no_database_metadata_in_backup= */ false);
}
for (const String & temporary_table_name_in_backup : temporary_table_names_in_backup)
collectTableInfo({"", temporary_table_name_in_backup}, /* is_temporary_table= */ true, /* partitions= */ {});
}
void RestorerFromBackup::checkAccessForCollectedInfos() const
{
AccessRightsElements required_access;
for (const auto & database_name : database_infos | boost::adaptors::map_keys)
{
if (DatabaseCatalog::isPredefinedDatabaseName(database_name))
continue;
AccessFlags flags;
if (restore_settings.create_database != RestoreDatabaseCreationMode::kMustExist)
flags |= AccessType::CREATE_DATABASE;
if (!flags)
flags = AccessType::SHOW_DATABASES;
required_access.emplace_back(flags, database_name);
}
for (const auto & [table_name, table_info] : table_infos)
{
if (hasSystemTableEngine(*table_info.create_table_query))
continue;
if (table_name.is_temporary)
{
if (restore_settings.create_table != RestoreTableCreationMode::kMustExist)
required_access.emplace_back(AccessType::CREATE_TEMPORARY_TABLE);
continue;
}
AccessFlags flags;
const ASTCreateQuery & create = table_info.create_table_query->as<const ASTCreateQuery &>();
if (restore_settings.create_table != RestoreTableCreationMode::kMustExist)
{
if (create.is_dictionary)
flags |= AccessType::CREATE_DICTIONARY;
else if (create.is_ordinary_view || create.is_materialized_view || create.is_live_view)
flags |= AccessType::CREATE_VIEW;
else
flags |= AccessType::CREATE_TABLE;
}
if (!restore_settings.structure_only && !create.is_dictionary && !create.is_ordinary_view
&& backup->hasFiles(table_info.data_path_in_backup))
{
flags |= AccessType::INSERT;
}
if (!flags)
{
if (create.is_dictionary)
flags = AccessType::SHOW_DICTIONARIES;
else
flags = AccessType::SHOW_TABLES;
}
required_access.emplace_back(flags, table_name.name.database, table_name.name.table);
}
if (access_restore_task)
insertAtEnd(required_access, access_restore_task->getRequiredAccess());
/// We convert to AccessRights and back to check access rights in a predictable way
/// (some elements could be duplicated or not sorted).
required_access = AccessRights{required_access}.getElements();
context->checkAccess(required_access);
}
void RestorerFromBackup::createDatabases()
{
for (const auto & [database_name, database_info] : database_infos)
{
bool need_create_database = (restore_settings.create_database != RestoreDatabaseCreationMode::kMustExist);
if (need_create_database && DatabaseCatalog::isPredefinedDatabaseName(database_name))
need_create_database = false; /// Predefined databases always exist.
if (need_create_database)
{
/// Execute CREATE DATABASE query.
auto create_database_query = database_info.create_database_query;
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
{
create_database_query = create_database_query->clone();
create_database_query->as<ASTCreateQuery &>().if_not_exists = true;
}
LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query));
executeCreateQuery(create_database_query);
}
DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name);
if (!restore_settings.allow_different_database_def)
{
/// Check that the database's definition is the same as expected.
ASTPtr create_database_query = database->getCreateDatabaseQueryForBackup();
ASTPtr expected_create_query = database_info.create_database_query;
if (serializeAST(*create_database_query) != serializeAST(*expected_create_query))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_DATABASE,
"The database {} has a different definition: {} "
"comparing to its definition in the backup: {}",
backQuoteIfNeed(database_name),
serializeAST(*create_database_query),
serializeAST(*expected_create_query));
}
}
}
}
void RestorerFromBackup::createTables()
{
while (true)
{
/// We need to create tables considering their dependencies.
auto tables_to_create = findTablesWithoutDependencies();
if (tables_to_create.empty())
break; /// We've already created all the tables.
for (const auto & table_key : tables_to_create)
{
auto & table_info = table_infos.at(table_key);
DatabasePtr database;
if (table_key.is_temporary)
database = DatabaseCatalog::instance().getDatabaseForTemporaryTables();
else
database = DatabaseCatalog::instance().getDatabase(table_key.name.database);
bool need_create_table = (restore_settings.create_table != RestoreTableCreationMode::kMustExist);
if (need_create_table && hasSystemTableEngine(*table_info.create_table_query))
need_create_table = false; /// Tables with System* table engine already exist or can't be created by SQL anyway.
if (need_create_table)
{
/// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some
/// database-specific things).
auto create_table_query = table_info.create_table_query;
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
{
create_table_query = create_table_query->clone();
create_table_query->as<ASTCreateQuery &>().if_not_exists = true;
}
LOG_TRACE(
log,
"Creating {}table {}: {}",
(table_key.is_temporary ? "temporary " : ""),
table_key.name.getFullName(),
serializeAST(*create_table_query));
database->createTableRestoredFromBackup(create_table_query, *this);
}
table_info.created = true;
auto resolved_id = table_key.is_temporary
? context->resolveStorageID(StorageID{"", table_key.name.table}, Context::ResolveExternal)
: context->resolveStorageID(StorageID{table_key.name.database, table_key.name.table}, Context::ResolveGlobal);
auto storage = database->getTable(resolved_id.table_name, context);
table_info.storage = storage;
table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
if (!restore_settings.allow_different_table_def)
{
ASTPtr create_table_query = storage->getCreateQueryForBackup(context, nullptr);
ASTPtr expected_create_query = table_info.create_table_query;
if (serializeAST(*create_table_query) != serializeAST(*expected_create_query))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TABLE,
"The {}table {} has a different definition: {} "
"comparing to its definition in the backup: {}",
(table_key.is_temporary ? "temporary " : ""),
table_key.name.getFullName(),
serializeAST(*create_table_query),
serializeAST(*expected_create_query));
}
}
if (!restore_settings.structure_only)
{
const auto & data_path_in_backup = table_info.data_path_in_backup;
const auto & partitions = table_info.partitions;
storage->restoreDataFromBackup(*this, data_path_in_backup, partitions);
}
}
}
}
/// Returns the list of tables without dependencies or those which dependencies have been created before.
std::vector<RestorerFromBackup::TableKey> RestorerFromBackup::findTablesWithoutDependencies() const
{
std::vector<TableKey> tables_without_dependencies;
bool all_tables_created = true;
for (const auto & [key, table_info] : table_infos)
{
if (table_info.created)
continue;
/// Found a table which is not created yet.
all_tables_created = false;
/// Check if all dependencies have been created before.
bool all_dependencies_met = true;
for (const auto & dependency : table_info.dependencies)
{
auto it = table_infos.find(TableKey{dependency, false});
if ((it != table_infos.end()) && !it->second.created)
{
all_dependencies_met = false;
break;
}
}
if (all_dependencies_met)
tables_without_dependencies.push_back(key);
}
if (!tables_without_dependencies.empty())
return tables_without_dependencies;
if (all_tables_created)
return {};
/// Cyclic dependency? We'll try to create those tables anyway but probably it's going to fail.
std::vector<TableKey> tables_with_cyclic_dependencies;
for (const auto & [key, table_info] : table_infos)
{
if (!table_info.created)
tables_with_cyclic_dependencies.push_back(key);
}
/// Only show a warning here, proper exception will be thrown later on creating those tables.
LOG_WARNING(
log,
"Some tables have cyclic dependency from each other: {}",
boost::algorithm::join(
tables_with_cyclic_dependencies
| boost::adaptors::transformed([](const TableKey & key) -> String { return key.name.getFullName(); }),
", "));
return tables_with_cyclic_dependencies;
}
void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task)
{
if (current_stage == Stage::kInsertingDataToTables)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed");
data_restore_tasks.push_back(std::move(new_task));
}
void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks)
{
if (current_stage == Stage::kInsertingDataToTables)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed");
insertAtEnd(data_restore_tasks, std::move(new_tasks));
}
void RestorerFromBackup::checkPathInBackupToRestoreAccess(const String & path)
{
if (!access_restore_task || !access_restore_task->hasDataPath(path))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Path to restore access was not added");
}
void RestorerFromBackup::executeCreateQuery(const ASTPtr & create_query) const
{
InterpreterCreateQuery interpreter{create_query, context};
interpreter.setInternal(true);
interpreter.execute();
}
void RestorerFromBackup::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine)
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TABLE,
"Table engine {} doesn't support partitions, cannot table {}",
table_engine,
storage_id.getFullTableName());
}
void RestorerFromBackup::throwTableIsNotEmpty(const StorageID & storage_id)
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TABLE,
"Cannot restore the table {} because it already contains some data. You can set structure_only=true or "
"allow_non_empty_tables=true to overcome that in the way you want",
storage_id.getFullTableName());
}
}

View File

@ -0,0 +1,146 @@
#pragma once
#include <Backups/RestoreSettings.h>
#include <Databases/DDLRenamingVisitor.h>
#include <Parsers/ASTBackupQuery.h>
#include <Storages/TableLockHolder.h>
#include <Storages/IStorage_fwd.h>
#include <Interpreters/Context_fwd.h>
#include <filesystem>
namespace DB
{
class IBackup;
using BackupPtr = std::shared_ptr<const IBackup>;
class IRestoreCoordination;
struct StorageID;
class AccessRestoreTask;
/// Restores the definition of databases and tables and prepares tasks to restore the data of the tables.
class RestorerFromBackup : private boost::noncopyable
{
public:
RestorerFromBackup(
const ASTBackupQuery::Elements & restore_query_elements_,
const RestoreSettings & restore_settings_,
std::shared_ptr<IRestoreCoordination> restore_coordination_,
const BackupPtr & backup_,
const ContextMutablePtr & context_,
std::chrono::seconds timeout_);
~RestorerFromBackup();
/// Restores the definition of databases and tables and prepares tasks to restore the data of the tables.
/// restoreMetadata() checks access rights internally so checkAccessRightsOnly() shouldn't be called first.
void restoreMetadata();
/// Only checks access rights without restoring anything.
void checkAccessOnly();
using DataRestoreTask = std::function<void()>;
using DataRestoreTasks = std::vector<DataRestoreTask>;
DataRestoreTasks getDataRestoreTasks();
BackupPtr getBackup() const { return backup; }
const RestoreSettings & getRestoreSettings() const { return restore_settings; }
bool isNonEmptyTableAllowed() const { return getRestoreSettings().allow_non_empty_tables; }
std::shared_ptr<IRestoreCoordination> getRestoreCoordination() const { return restore_coordination; }
std::chrono::seconds getTimeout() const { return timeout; }
ContextMutablePtr getContext() const { return context; }
void executeCreateQuery(const ASTPtr & create_query) const;
/// Adds a data restore task which will be later returned by getDataRestoreTasks().
/// This function can be called by implementations of IStorage::restoreFromBackup() in inherited storage classes.
void addDataRestoreTask(DataRestoreTask && new_task);
void addDataRestoreTasks(DataRestoreTasks && new_tasks);
/// Adds a new data path to restore access control.
void checkPathInBackupToRestoreAccess(const String & path);
/// Reading a backup includes a few stages:
enum class Stage
{
/// Initial stage.
kPreparing,
/// Finding databases and tables in the backup which we're going to restore.
kFindingTablesInBackup,
/// Creating databases or finding them and checking their definitions.
kCreatingDatabases,
/// Creating tables or finding them and checking their definition.
kCreatingTables,
/// Inserting restored data to tables.
kInsertingDataToTables,
/// An error happens during any of the stages above, the backup is not restored properly.
kError = -1,
};
static std::string_view toString(Stage stage);
/// Throws an exception that a specified table engine doesn't support partitions.
[[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine);
/// Throws an exception that a specified table is already non-empty.
[[noreturn]] static void throwTableIsNotEmpty(const StorageID & storage_id);
private:
const ASTBackupQuery::Elements restore_query_elements;
const RestoreSettings restore_settings;
std::shared_ptr<IRestoreCoordination> restore_coordination;
BackupPtr backup;
ContextMutablePtr context;
std::chrono::seconds timeout;
Poco::Logger * log;
Stage current_stage = Stage::kPreparing;
std::vector<std::filesystem::path> root_paths_in_backup;
DDLRenamingMap renaming_map;
void run(bool only_check_access);
void setStage(Stage new_stage, const String & error_message = {});
void findRootPathsInBackup();
void collectDatabaseAndTableInfos();
void collectTableInfo(const QualifiedTableName & table_name_in_backup, bool is_temporary_table, const std::optional<ASTs> & partitions);
void collectDatabaseInfo(const String & database_name_in_backup, const std::set<DatabaseAndTableName> & except_table_names, bool throw_if_no_database_metadata_in_backup);
void collectAllDatabasesInfo(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names);
void checkAccessForCollectedInfos() const;
void createDatabases();
void createTables();
struct DatabaseInfo
{
ASTPtr create_database_query;
};
struct TableInfo
{
ASTPtr create_table_query;
std::optional<ASTs> partitions;
std::filesystem::path data_path_in_backup;
std::unordered_set<QualifiedTableName> dependencies;
bool created = false;
StoragePtr storage;
TableLockHolder table_lock;
};
struct TableKey
{
QualifiedTableName name;
bool is_temporary = false;
bool operator ==(const TableKey & right) const;
bool operator <(const TableKey & right) const;
};
std::vector<TableKey> findTablesWithoutDependencies() const;
std::unordered_map<String, DatabaseInfo> database_infos;
std::map<TableKey, TableInfo> table_infos;
std::vector<DataRestoreTask> data_restore_tasks;
std::shared_ptr<AccessRestoreTask> access_restore_task;
};
}

View File

@ -1,17 +0,0 @@
#include <Backups/formatTableNameOrTemporaryTableName.h>
#include <Common/quoteString.h>
#include <Interpreters/DatabaseCatalog.h>
namespace DB
{
String formatTableNameOrTemporaryTableName(const DatabaseAndTableName & table_name)
{
if (table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
return "temporary table " + backQuoteIfNeed(table_name.second);
else
return "table " + backQuoteIfNeed(table_name.first) + "." + backQuoteIfNeed(table_name.second);
}
}

View File

@ -1,13 +0,0 @@
#pragma once
#include <base/types.h>
namespace DB
{
using DatabaseAndTableName = std::pair<String, String>;
/// Outputs either "table db_name.table_name" or "temporary table table_name".
String formatTableNameOrTemporaryTableName(const DatabaseAndTableName & table_name);
}

View File

@ -1,37 +0,0 @@
#include <Backups/replaceTableUUIDWithMacroInReplicatedTableDef.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
namespace DB
{
void replaceTableUUIDWithMacroInReplicatedTableDef(ASTCreateQuery & create_query, const UUID & table_uuid)
{
if (create_query.getTable().empty() || !create_query.storage || !create_query.storage->engine || (table_uuid == UUIDHelpers::Nil))
return;
auto & engine = *(create_query.storage->engine);
if (!engine.name.starts_with("Replicated") || !engine.arguments)
return;
auto * args = typeid_cast<ASTExpressionList *>(engine.arguments.get());
size_t zookeeper_path_arg_pos = engine.name.starts_with("ReplicatedGraphite") ? 1 : 0;
if (!args || (args->children.size() <= zookeeper_path_arg_pos))
return;
auto * zookeeper_path_arg = typeid_cast<ASTLiteral *>(args->children[zookeeper_path_arg_pos].get());
if (!zookeeper_path_arg || (zookeeper_path_arg->value.getType() != Field::Types::String))
return;
String & zookeeper_path = zookeeper_path_arg->value.get<String>();
String table_uuid_str = toString(table_uuid);
if (size_t uuid_pos = zookeeper_path.find(table_uuid_str); uuid_pos != String::npos)
zookeeper_path.replace(uuid_pos, table_uuid_str.size(), "{uuid}");
}
}

Some files were not shown because too many files have changed in this diff Show More