mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge branch 'master' into read-cgroup-memory-usage-async-metrics
This commit is contained in:
commit
1d1dc9c7ad
4
.github/workflows/backport_branches.yml
vendored
4
.github/workflows/backport_branches.yml
vendored
@ -36,10 +36,6 @@ jobs:
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
echo "Testing the main ci directory"
|
||||
python3 -m unittest discover -s . -p 'test_*.py'
|
||||
for dir in *_lambda/; do
|
||||
echo "Testing $dir"
|
||||
python3 -m unittest discover -s "$dir" -p 'test_*.py'
|
||||
done
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
|
4
.github/workflows/master.yml
vendored
4
.github/workflows/master.yml
vendored
@ -33,10 +33,6 @@ jobs:
|
||||
# cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# echo "Testing the main ci directory"
|
||||
# python3 -m unittest discover -s . -p 'test_*.py'
|
||||
# for dir in *_lambda/; do
|
||||
# echo "Testing $dir"
|
||||
# python3 -m unittest discover -s "$dir" -p 'test_*.py'
|
||||
# done
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
|
4
.github/workflows/merge_queue.yml
vendored
4
.github/workflows/merge_queue.yml
vendored
@ -30,10 +30,6 @@ jobs:
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
echo "Testing the main ci directory"
|
||||
python3 -m unittest discover -s . -p 'test_*.py'
|
||||
for dir in *_lambda/; do
|
||||
echo "Testing $dir"
|
||||
python3 -m unittest discover -s "$dir" -p 'test_*.py'
|
||||
done
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
|
4
.github/workflows/pull_request.yml
vendored
4
.github/workflows/pull_request.yml
vendored
@ -48,10 +48,6 @@ jobs:
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
echo "Testing the main ci directory"
|
||||
python3 -m unittest discover -s . -p 'test_*.py'
|
||||
for dir in *_lambda/; do
|
||||
echo "Testing $dir"
|
||||
python3 -m unittest discover -s "$dir" -p 'test_*.py'
|
||||
done
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
|
4
.github/workflows/release_branches.yml
vendored
4
.github/workflows/release_branches.yml
vendored
@ -33,10 +33,6 @@ jobs:
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
echo "Testing the main ci directory"
|
||||
python3 -m unittest discover -s . -p 'test_*.py'
|
||||
for dir in *_lambda/; do
|
||||
echo "Testing $dir"
|
||||
python3 -m unittest discover -s "$dir" -p 'test_*.py'
|
||||
done
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
|
@ -1298,7 +1298,6 @@ elseif(ARCH_PPC64LE)
|
||||
${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c
|
||||
${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c
|
||||
${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c
|
||||
${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c
|
||||
${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c
|
||||
${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c
|
||||
${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c
|
||||
|
@ -4,6 +4,9 @@
|
||||
source /setup_export_logs.sh
|
||||
set -e -x
|
||||
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-3600}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME))
|
||||
|
||||
# Choose random timezone for this test run
|
||||
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
|
||||
echo "Choosen random timezone $TZ"
|
||||
@ -242,7 +245,22 @@ function run_tests()
|
||||
}
|
||||
|
||||
export -f run_tests
|
||||
timeout "$MAX_RUN_TIME" bash -c run_tests ||:
|
||||
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
echo "The command 'timeout ${*}' has been killed by timeout"
|
||||
fi
|
||||
|
||||
return $exit_code
|
||||
}
|
||||
|
||||
TIMEOUT=$((MAX_RUN_TIME - 700))
|
||||
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
||||
|
||||
echo "Files in current directory"
|
||||
ls -la ./
|
||||
|
@ -6,18 +6,12 @@ source /setup_export_logs.sh
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-7200}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME))
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-9000}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME))
|
||||
|
||||
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
|
||||
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
|
||||
|
||||
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
|
||||
|
||||
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
|
||||
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
|
||||
fi
|
||||
|
||||
# Choose random timezone for this test run.
|
||||
#
|
||||
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
|
||||
@ -101,53 +95,6 @@ if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
mkdir -p /var/run/clickhouse-server
|
||||
fi
|
||||
|
||||
# Run a CH instance to execute sequential tests on it in parallel with all other tests.
|
||||
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
|
||||
mkdir -p /var/run/clickhouse-server3 /etc/clickhouse-server3 /var/lib/clickhouse3
|
||||
cp -r -L /etc/clickhouse-server/* /etc/clickhouse-server3/
|
||||
|
||||
sudo chown clickhouse:clickhouse /var/run/clickhouse-server3 /var/lib/clickhouse3 /etc/clickhouse-server3/
|
||||
sudo chown -R clickhouse:clickhouse /etc/clickhouse-server3/*
|
||||
|
||||
function replace(){
|
||||
sudo find /etc/clickhouse-server3/ -type f -name '*.xml' -exec sed -i "$1" {} \;
|
||||
}
|
||||
|
||||
replace "s|<port>9000</port>|<port>19000</port>|g"
|
||||
replace "s|<port>9440</port>|<port>19440</port>|g"
|
||||
replace "s|<port>9988</port>|<port>19988</port>|g"
|
||||
replace "s|<port>9234</port>|<port>19234</port>|g"
|
||||
replace "s|<port>9181</port>|<port>19181</port>|g"
|
||||
replace "s|<https_port>8443</https_port>|<https_port>18443</https_port>|g"
|
||||
replace "s|<tcp_port>9000</tcp_port>|<tcp_port>19000</tcp_port>|g"
|
||||
replace "s|<tcp_port>9181</tcp_port>|<tcp_port>19181</tcp_port>|g"
|
||||
replace "s|<tcp_port_secure>9440</tcp_port_secure>|<tcp_port_secure>19440</tcp_port_secure>|g"
|
||||
replace "s|<tcp_with_proxy_port>9010</tcp_with_proxy_port>|<tcp_with_proxy_port>19010</tcp_with_proxy_port>|g"
|
||||
replace "s|<mysql_port>9004</mysql_port>|<mysql_port>19004</mysql_port>|g"
|
||||
replace "s|<postgresql_port>9005</postgresql_port>|<postgresql_port>19005</postgresql_port>|g"
|
||||
replace "s|<interserver_http_port>9009</interserver_http_port>|<interserver_http_port>19009</interserver_http_port>|g"
|
||||
replace "s|8123|18123|g"
|
||||
replace "s|/var/lib/clickhouse/|/var/lib/clickhouse3/|g"
|
||||
replace "s|/etc/clickhouse-server/|/etc/clickhouse-server3/|g"
|
||||
# distributed cache
|
||||
replace "s|<tcp_port>10001</tcp_port>|<tcp_port>10003</tcp_port>|g"
|
||||
replace "s|<tcp_port>10002</tcp_port>|<tcp_port>10004</tcp_port>|g"
|
||||
|
||||
sudo -E -u clickhouse /usr/bin/clickhouse server --daemon --config /etc/clickhouse-server3/config.xml \
|
||||
--pid-file /var/run/clickhouse-server3/clickhouse-server.pid \
|
||||
-- --path /var/lib/clickhouse3/ --logger.stderr /var/log/clickhouse-server/stderr3.log \
|
||||
--logger.log /var/log/clickhouse-server/clickhouse-server3.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server3.err.log \
|
||||
--tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \
|
||||
--prometheus.port 19988 --keeper_server.raft_configuration.server.port 19234 --keeper_server.tcp_port 19181 \
|
||||
--mysql_port 19004 --postgresql_port 19005
|
||||
|
||||
for _ in {1..100}
|
||||
do
|
||||
clickhouse-client --port 19000 --query "SELECT 1" && break
|
||||
sleep 1
|
||||
done
|
||||
fi
|
||||
|
||||
# simplest way to forward env variables to server
|
||||
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
|
||||
|
||||
@ -183,9 +130,6 @@ if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
--keeper_server.tcp_port 29181 --keeper_server.server_id 3 \
|
||||
--prometheus.port 29988 \
|
||||
--macros.shard s2 # It doesn't work :(
|
||||
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours)
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
|
||||
fi
|
||||
|
||||
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
|
||||
@ -210,9 +154,6 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
|
||||
--keeper_server.tcp_port 19181 --keeper_server.server_id 2 \
|
||||
--prometheus.port 19988 \
|
||||
--macros.replica r2 # It doesn't work :(
|
||||
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours)
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
|
||||
fi
|
||||
|
||||
# Wait for the server to start, but not for too long.
|
||||
@ -223,7 +164,6 @@ do
|
||||
done
|
||||
|
||||
setup_logs_replication
|
||||
|
||||
attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
|
||||
function fn_exists() {
|
||||
@ -284,11 +224,7 @@ function run_tests()
|
||||
else
|
||||
# All other configurations are OK.
|
||||
ADDITIONAL_OPTIONS+=('--jobs')
|
||||
ADDITIONAL_OPTIONS+=('5')
|
||||
fi
|
||||
|
||||
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--run-sequential-tests-in-parallel')
|
||||
ADDITIONAL_OPTIONS+=('8')
|
||||
fi
|
||||
|
||||
if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then
|
||||
@ -373,9 +309,6 @@ done
|
||||
# Because it's the simplest way to read it when server has crashed.
|
||||
sudo clickhouse stop ||:
|
||||
|
||||
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
|
||||
sudo clickhouse stop --pid-path /var/run/clickhouse-server3 ||:
|
||||
fi
|
||||
|
||||
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||:
|
||||
@ -393,12 +326,6 @@ rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
|
||||
rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
|
||||
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
|
||||
|
||||
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
|
||||
rg -Fa "<Fatal>" /var/log/clickhouse-server3/clickhouse-server.log ||:
|
||||
rg -A50 -Fa "============" /var/log/clickhouse-server3/stderr.log ||:
|
||||
zstd --threads=0 < /var/log/clickhouse-server3/clickhouse-server.log > /test_output/clickhouse-server3.log.zst &
|
||||
fi
|
||||
|
||||
data_path_config="--path=/var/lib/clickhouse/"
|
||||
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
|
||||
# We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason)
|
||||
@ -419,10 +346,6 @@ if [ $failed_to_save_logs -ne 0 ]; then
|
||||
do
|
||||
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
|
||||
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
|
||||
clickhouse-local --path /var/lib/clickhouse3/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.3.tsv.zst ||:
|
||||
fi
|
||||
|
||||
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
|
||||
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
|
||||
@ -464,12 +387,6 @@ rm -rf /var/lib/clickhouse/data/system/*/
|
||||
tar -chf /test_output/store.tar /var/lib/clickhouse/store ||:
|
||||
tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||:
|
||||
|
||||
if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then
|
||||
rm -rf /var/lib/clickhouse3/data/system/*/
|
||||
tar -chf /test_output/store.tar /var/lib/clickhouse3/store ||:
|
||||
tar -chf /test_output/metadata.tar /var/lib/clickhouse3/metadata/*.sql ||:
|
||||
fi
|
||||
|
||||
|
||||
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server1.log ||:
|
||||
|
@ -67,6 +67,7 @@ The supported formats are:
|
||||
| [Prometheus](#prometheus) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [ProtobufList](#protobuflist) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
@ -1952,6 +1953,35 @@ SYSTEM DROP FORMAT SCHEMA CACHE FOR Protobuf
|
||||
|
||||
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.
|
||||
|
||||
## ProtobufList {#protobuflist}
|
||||
|
||||
Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope".
|
||||
|
||||
Usage example:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType'
|
||||
```
|
||||
|
||||
``` bash
|
||||
cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'"
|
||||
```
|
||||
|
||||
where the file `schemafile.proto` looks like this:
|
||||
|
||||
``` capnp
|
||||
syntax = "proto3";
|
||||
message Envelope {
|
||||
message MessageType {
|
||||
string name = 1;
|
||||
string surname = 2;
|
||||
uint32 birthDate = 3;
|
||||
repeated string phoneNumbers = 4;
|
||||
};
|
||||
MessageType row = 1;
|
||||
};
|
||||
```
|
||||
|
||||
## Avro {#data-format-avro}
|
||||
|
||||
[Apache Avro](https://avro.apache.org/) is a row-oriented data serialization framework developed within Apache’s Hadoop project.
|
||||
|
@ -124,7 +124,7 @@ which is equal to
|
||||
|
||||
#### Default values for from_env and from_zk attributes
|
||||
|
||||
It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"`.
|
||||
It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"` (must be declared before from_env).
|
||||
|
||||
With previous example, but `MAX_QUERY_SIZE` is unset:
|
||||
|
||||
@ -132,7 +132,7 @@ With previous example, but `MAX_QUERY_SIZE` is unset:
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_query_size from_env="MAX_QUERY_SIZE" replace="1">150000</max_query_size>
|
||||
<max_query_size replace="1" from_env="MAX_QUERY_SIZE">150000</max_query_size>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
|
@ -9,7 +9,6 @@ Columns:
|
||||
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) – The name of the function.
|
||||
- `is_aggregate` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Whether the function is an aggregate function.
|
||||
- `is_deterministic` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md))) - Whether the function is deterministic.
|
||||
- `case_insensitive`, ([UInt8](../../sql-reference/data-types/int-uint.md)) - Whether the function name can be used case-insensitively.
|
||||
- `alias_to`, ([String](../../sql-reference/data-types/string.md)) - The original function name, if the function name is an alias.
|
||||
- `create_query`, ([String](../../sql-reference/data-types/enum.md)) - Unused.
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/azureBlobStorageCluster
|
||||
sidebar_position: 55
|
||||
sidebar_position: 15
|
||||
sidebar_label: azureBlobStorageCluster
|
||||
title: "azureBlobStorageCluster Table Function"
|
||||
---
|
||||
|
@ -45,16 +45,17 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
|
||||
keeper_context->setDigestEnabled(true);
|
||||
keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
|
||||
|
||||
DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
|
||||
/// TODO(hanfei): support rocksdb here
|
||||
DB::KeeperMemoryStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
|
||||
|
||||
DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
|
||||
storage.initializeSystemNodes();
|
||||
|
||||
DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
|
||||
DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
|
||||
DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
|
||||
DB::KeeperStorageSnapshot<DB::KeeperMemoryStorage> snapshot(&storage, snapshot_meta);
|
||||
|
||||
DB::KeeperSnapshotManager manager(1, keeper_context);
|
||||
DB::KeeperSnapshotManager<DB::KeeperMemoryStorage> manager(1, keeper_context);
|
||||
auto snp = manager.serializeSnapshotToBuffer(snapshot);
|
||||
auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
|
||||
std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl;
|
||||
|
@ -53,6 +53,10 @@
|
||||
# include <Server/CertificateReloader.h>
|
||||
#endif
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
# include <Common/GWPAsan.h>
|
||||
#endif
|
||||
|
||||
#include <Server/ProtocolServerAdapter.h>
|
||||
#include <Server/KeeperTCPHandlerFactory.h>
|
||||
|
||||
@ -642,6 +646,10 @@ try
|
||||
tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization");
|
||||
}
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
GWPAsan::initFinished();
|
||||
#endif
|
||||
|
||||
LOG_INFO(log, "Ready for connections.");
|
||||
|
||||
waitForTerminationRequest();
|
||||
|
@ -2211,6 +2211,7 @@ try
|
||||
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
GWPAsan::initFinished();
|
||||
GWPAsan::setForceSampleProbability(server_settings.gwp_asan_force_sample_probability);
|
||||
#endif
|
||||
|
||||
@ -2729,8 +2730,7 @@ void Server::createInterserverServers(
|
||||
|
||||
void Server::stopServers(
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
const ServerType & server_type
|
||||
) const
|
||||
const ServerType & server_type) const
|
||||
{
|
||||
LoggerRawPtr log = &logger();
|
||||
|
||||
|
@ -129,8 +129,7 @@ private:
|
||||
|
||||
void stopServers(
|
||||
std::vector<ProtocolServerAdapter> & servers,
|
||||
const ServerType & server_type
|
||||
) const;
|
||||
const ServerType & server_type) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -118,10 +118,10 @@ AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string
|
||||
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .is_order_dependent = false };
|
||||
factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::Case::Insensitive);
|
||||
|
||||
/// This is widely used term
|
||||
factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -361,9 +361,9 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
|
||||
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||
|
||||
factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
|
||||
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("any_value", "any", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("first_value", "any", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
|
||||
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
}
|
||||
|
@ -221,11 +221,11 @@ void registerAggregateFunctionsAnyRespectNulls(AggregateFunctionFactory & factor
|
||||
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
|
||||
|
||||
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
|
||||
|
||||
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::Case::Insensitive);
|
||||
|
||||
/// Must happen after registering any and anyLast
|
||||
factory.registerNullsActionTransformation("any", "any_respect_nulls");
|
||||
|
@ -46,6 +46,6 @@ AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const
|
||||
|
||||
void registerAggregateFunctionAvg(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
}
|
||||
|
@ -234,9 +234,9 @@ void registerAggregateFunctionsBitwise(AggregateFunctionFactory & factory)
|
||||
factory.registerFunction("groupBitXor", createAggregateFunctionBitwise<AggregateFunctionGroupBitXorData>);
|
||||
|
||||
/// Aliases for compatibility with MySQL.
|
||||
factory.registerAlias("BIT_OR", "groupBitOr", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("BIT_AND", "groupBitAnd", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("BIT_XOR", "groupBitXor", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("BIT_OR", "groupBitOr", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("BIT_AND", "groupBitAnd", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("BIT_XOR", "groupBitXor", AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ template <typename T1, typename T2> using AggregateFunctionCorr = AggregateFunct
|
||||
|
||||
void registerAggregateFunctionsStatisticsCorr(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("corr", createAggregateFunctionStatisticsBinary<AggregateFunctionCorr, StatisticsFunctionKind::corr>, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("corr", createAggregateFunctionStatisticsBinary<AggregateFunctionCorr, StatisticsFunctionKind::corr>, AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons
|
||||
void registerAggregateFunctionCount(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
|
||||
factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,8 +13,8 @@ void registerAggregateFunctionsStatisticsCovar(AggregateFunctionFactory & factor
|
||||
factory.registerFunction("covarPop", createAggregateFunctionStatisticsBinary<AggregateFunctionCovar, StatisticsFunctionKind::covarPop>);
|
||||
|
||||
/// Synonyms for compatibility.
|
||||
factory.registerAlias("COVAR_SAMP", "covarSamp", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("COVAR_POP", "covarPop", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("COVAR_SAMP", "covarSamp", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("COVAR_POP", "covarPop", AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
|
||||
return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name);
|
||||
}
|
||||
|
||||
void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness)
|
||||
void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, Case case_sensitiveness)
|
||||
{
|
||||
if (creator_with_properties.creator == nullptr)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: "
|
||||
@ -39,7 +39,7 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: the aggregate function name '{}' is not unique",
|
||||
name);
|
||||
|
||||
if (case_sensitiveness == CaseInsensitive)
|
||||
if (case_sensitiveness == Case::Insensitive)
|
||||
{
|
||||
auto key = Poco::toLower(name);
|
||||
if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second)
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
void registerFunction(
|
||||
const String & name,
|
||||
Value creator,
|
||||
CaseSensitiveness case_sensitiveness = CaseSensitive);
|
||||
Case case_sensitiveness = Case::Sensitive);
|
||||
|
||||
/// Register how to transform from one aggregate function to other based on NullsAction
|
||||
/// Registers them both ways:
|
||||
|
@ -840,8 +840,8 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties });
|
||||
factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
|
||||
factory.registerFunction("groupArrayLast", { createAggregateFunctionGroupArray<true>, properties });
|
||||
}
|
||||
|
@ -150,8 +150,18 @@ public:
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readVarUInt(this->data(place).version, buf);
|
||||
this->data(place).value.read(buf);
|
||||
auto & set = this->data(place).value;
|
||||
auto & version = this->data(place).version;
|
||||
size_t size;
|
||||
readVarUInt(version, buf);
|
||||
readVarUInt(size, buf);
|
||||
set.reserve(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
int key;
|
||||
readIntBinary(key, buf);
|
||||
set.insert(key);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
@ -292,7 +302,7 @@ public:
|
||||
}
|
||||
return new_map;
|
||||
};
|
||||
auto new_map = rhs_value.size() < set.size() ? create_new_map(rhs_value, set) : create_new_map(set, rhs_value);
|
||||
auto new_map = create_new_map(set, rhs_value);
|
||||
set = std::move(new_map);
|
||||
}
|
||||
}
|
||||
@ -316,11 +326,9 @@ public:
|
||||
readVarUInt(version, buf);
|
||||
readVarUInt(size, buf);
|
||||
set.reserve(size);
|
||||
UInt64 elem_version;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto key = readStringBinaryInto(*arena, buf);
|
||||
readVarUInt(elem_version, buf);
|
||||
set.insert(key);
|
||||
}
|
||||
}
|
||||
|
@ -277,7 +277,7 @@ void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
|
||||
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -350,7 +350,7 @@ AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
|
||||
|
||||
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -15,11 +15,11 @@ void registerAggregateFunctionsStatisticsSecondMoment(AggregateFunctionFactory &
|
||||
factory.registerFunction("stddevPop", createAggregateFunctionStatisticsUnary<AggregateFunctionSecondMoment, StatisticsFunctionKind::stddevPop>);
|
||||
|
||||
/// Synonyms for compatibility.
|
||||
factory.registerAlias("VAR_SAMP", "varSamp", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("VAR_SAMP", "varSamp", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -72,7 +72,7 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const
|
||||
|
||||
void registerAggregateFunctionSum(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("sum", createAggregateFunctionSum<AggregateFunctionSumSimple>, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("sum", createAggregateFunctionSum<AggregateFunctionSumSimple>, AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerFunction("sumWithOverflow", createAggregateFunctionSum<AggregateFunctionSumWithOverflow>);
|
||||
factory.registerFunction("sumKahan", createAggregateFunctionSum<AggregateFunctionSumKahan>);
|
||||
}
|
||||
|
@ -535,9 +535,9 @@ void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
|
||||
|
||||
factory.registerFunction("topK", { createAggregateFunctionTopK<false, false>, properties });
|
||||
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true, false>, properties });
|
||||
factory.registerFunction("approx_top_k", { createAggregateFunctionTopK<false, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK<true, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("approx_top_k", { createAggregateFunctionTopK<false, true>, properties }, AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK<true, true>, properties }, AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -195,8 +195,8 @@ AggregateFunctionPtr createAggregateFunctionMinMax(
|
||||
|
||||
void registerAggregateFunctionsMinMax(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("min", createAggregateFunctionMinMax<true>, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("max", createAggregateFunctionMinMax<false>, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("min", createAggregateFunctionMinMax<true>, AggregateFunctionFactory::Case::Insensitive);
|
||||
factory.registerFunction("max", createAggregateFunctionMinMax<false>, AggregateFunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2919,6 +2919,17 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
|
||||
resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
|
||||
}
|
||||
|
||||
/// Edge case when the first argument of IN is scalar subquery.
|
||||
auto & in_first_argument = function_in_arguments_nodes[0];
|
||||
auto first_argument_type = in_first_argument->getNodeType();
|
||||
if (first_argument_type == QueryTreeNodeType::QUERY || first_argument_type == QueryTreeNodeType::UNION)
|
||||
{
|
||||
IdentifierResolveScope subquery_scope(in_first_argument, &scope /*parent_scope*/);
|
||||
subquery_scope.subquery_depth = scope.subquery_depth + 1;
|
||||
|
||||
evaluateScalarSubqueryIfNeeded(in_first_argument, subquery_scope);
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize function argument columns
|
||||
|
@ -419,9 +419,6 @@ dbms_target_link_libraries (
|
||||
boost::circular_buffer
|
||||
boost::heap)
|
||||
|
||||
target_include_directories(clickhouse_common_io PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include") # uses some includes from core
|
||||
dbms_target_include_directories(PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include")
|
||||
|
||||
target_link_libraries(clickhouse_common_io PUBLIC
|
||||
ch_contrib::miniselect
|
||||
ch_contrib::pdqsort)
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/SignalHandlers.h>
|
||||
#include <Common/tryGetFileNameByFileDescriptor.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
@ -80,6 +81,10 @@
|
||||
#include <Common/config_version.h>
|
||||
#include "config.h"
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
# include <Common/GWPAsan.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using namespace std::literals;
|
||||
@ -302,7 +307,13 @@ public:
|
||||
};
|
||||
|
||||
|
||||
ClientBase::~ClientBase() = default;
|
||||
ClientBase::~ClientBase()
|
||||
{
|
||||
writeSignalIDtoSignalPipe(SignalListener::StopThread);
|
||||
signal_listener_thread.join();
|
||||
HandledSignals::instance().reset();
|
||||
}
|
||||
|
||||
ClientBase::ClientBase(
|
||||
int in_fd_,
|
||||
int out_fd_,
|
||||
@ -3072,6 +3083,8 @@ void ClientBase::init(int argc, char ** argv)
|
||||
("max_memory_usage_in_client", po::value<std::string>(), "Set memory limit in client/local server")
|
||||
|
||||
("fuzzer-args", po::value<std::string>(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.")
|
||||
|
||||
("client_logs_file", po::value<std::string>(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)")
|
||||
;
|
||||
|
||||
addOptions(options_description);
|
||||
@ -3236,6 +3249,30 @@ void ClientBase::init(int argc, char ** argv)
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
}
|
||||
|
||||
/// Print stacktrace in case of crash
|
||||
HandledSignals::instance().setupTerminateHandler();
|
||||
HandledSignals::instance().setupCommonDeadlySignalHandlers();
|
||||
/// We don't setup signal handlers for SIGINT, SIGQUIT, SIGTERM because we don't
|
||||
/// have an option for client to shutdown gracefully.
|
||||
|
||||
fatal_channel_ptr = new Poco::SplitterChannel;
|
||||
fatal_console_channel_ptr = new Poco::ConsoleChannel;
|
||||
fatal_channel_ptr->addChannel(fatal_console_channel_ptr);
|
||||
if (options.count("client_logs_file"))
|
||||
{
|
||||
fatal_file_channel_ptr = new Poco::SimpleFileChannel(options["client_logs_file"].as<std::string>());
|
||||
fatal_channel_ptr->addChannel(fatal_file_channel_ptr);
|
||||
}
|
||||
|
||||
fatal_log = createLogger("ClientBase", fatal_channel_ptr.get(), Poco::Message::PRIO_FATAL);
|
||||
signal_listener = std::make_unique<SignalListener>(nullptr, fatal_log);
|
||||
signal_listener_thread.start(*signal_listener);
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
GWPAsan::initFinished();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,9 @@
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/SimpleFileChannel.h>
|
||||
#include <Poco/SplitterChannel.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Client/Suggest.h>
|
||||
#include <boost/program_options.hpp>
|
||||
@ -212,6 +215,13 @@ protected:
|
||||
SharedContextHolder shared_context;
|
||||
ContextMutablePtr global_context;
|
||||
|
||||
LoggerPtr fatal_log;
|
||||
Poco::AutoPtr<Poco::SplitterChannel> fatal_channel_ptr;
|
||||
Poco::AutoPtr<Poco::Channel> fatal_console_channel_ptr;
|
||||
Poco::AutoPtr<Poco::Channel> fatal_file_channel_ptr;
|
||||
Poco::Thread signal_listener_thread;
|
||||
std::unique_ptr<Poco::Runnable> signal_listener;
|
||||
|
||||
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
|
||||
bool is_multiquery = false;
|
||||
bool delayed_interactive = false;
|
||||
|
@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons
|
||||
hash.update(wbuf.str().c_str(), wbuf.str().size());
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnAggregateFunction::getWeakHash32() const
|
||||
{
|
||||
auto s = data.size();
|
||||
if (hash.getData().size() != data.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), hash.getData().size());
|
||||
|
||||
WeakHash32 hash(s);
|
||||
auto & hash_data = hash.getData();
|
||||
|
||||
std::vector<UInt8> v;
|
||||
@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
|
||||
wbuf.finalize();
|
||||
hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]);
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
|
||||
|
@ -177,7 +177,7 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
|
@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
getData().updateHashWithValue(offset + i, hash);
|
||||
}
|
||||
|
||||
void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnArray::getWeakHash32() const
|
||||
{
|
||||
auto s = offsets->size();
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", s, hash.getData().size());
|
||||
WeakHash32 hash(s);
|
||||
|
||||
WeakHash32 internal_hash(data->size());
|
||||
data->updateWeakHash32(internal_hash);
|
||||
WeakHash32 internal_hash = data->getWeakHash32();
|
||||
|
||||
Offset prev_offset = 0;
|
||||
const auto & offsets_data = getOffsets();
|
||||
@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
|
||||
|
||||
prev_offset = offsets_data[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnArray::updateHashFast(SipHash & hash) const
|
||||
|
@ -82,7 +82,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <optional>
|
||||
#include <Core/Field.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
|
||||
@ -98,7 +99,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
|
||||
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
|
||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
|
||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
|
||||
WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
|
||||
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
|
||||
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
|
||||
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
|
||||
|
@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm
|
||||
{
|
||||
}
|
||||
|
||||
void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnConst::getWeakHash32() const
|
||||
{
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
WeakHash32 element_hash(1);
|
||||
data->updateWeakHash32(element_hash);
|
||||
size_t data_hash = element_hash.getData()[0];
|
||||
|
||||
for (auto & value : hash.getData())
|
||||
value = static_cast<UInt32>(intHashCRC32(data_hash, value));
|
||||
WeakHash32 element_hash = data->getWeakHash32();
|
||||
return WeakHash32(s, element_hash.getData()[0]);
|
||||
}
|
||||
|
||||
void ColumnConst::compareColumn(
|
||||
|
@ -204,7 +204,7 @@ public:
|
||||
data->updateHashWithValue(0, hash);
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override
|
||||
{
|
||||
|
@ -28,7 +28,6 @@ namespace ErrorCodes
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
@ -76,13 +75,10 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnDecimal<T>::getWeakHash32() const
|
||||
{
|
||||
auto s = data.size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const T * begin = data.data();
|
||||
const T * end = begin + s;
|
||||
@ -94,6 +90,8 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
++begin;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
template <is_decimal T>
|
||||
|
@ -102,7 +102,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -174,9 +175,9 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
variant_column->updateWeakHash32(hash);
|
||||
return variant_column->getWeakHash32();
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash & hash) const override
|
||||
|
@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const
|
||||
hash.update(reinterpret_cast<const char *>(&chars[n * index]), n);
|
||||
}
|
||||
|
||||
void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnFixedString::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, "
|
||||
"hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const UInt8 * pos = chars.data();
|
||||
UInt32 * hash_data = hash.getData().data();
|
||||
@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
|
||||
pos += n;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnFixedString::updateHashFast(SipHash & hash) const
|
||||
|
@ -133,7 +133,7 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t index, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -130,9 +131,9 @@ public:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName());
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 &) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName());
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName());
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash &) const override
|
||||
|
@ -7,8 +7,7 @@
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include "Storages/IndicesDescription.h"
|
||||
#include "base/types.h"
|
||||
#include <base/types.h>
|
||||
#include <base/sort.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
|
||||
return getDictionary().skipSerializedInArena(pos);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnLowCardinality::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
const auto & dict = getDictionary().getNestedColumn();
|
||||
WeakHash32 dict_hash(dict->size());
|
||||
dict->updateWeakHash32(dict_hash);
|
||||
|
||||
idx.updateWeakHash(hash, dict_hash);
|
||||
WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32();
|
||||
return idx.getWeakHash(dict_hash);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updateHashFast(SipHash & hash) const
|
||||
@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const
|
||||
return contains;
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const
|
||||
WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const
|
||||
{
|
||||
WeakHash32 hash(positions->size());
|
||||
auto & hash_data = hash.getData();
|
||||
auto & dict_hash_data = dict_hash.getData();
|
||||
const auto & dict_hash_data = dict_hash.getData();
|
||||
|
||||
auto update_weak_hash = [&](auto x)
|
||||
{
|
||||
@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
|
||||
auto size = data.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
hash_data[i] = static_cast<UInt32>(intHashCRC32(dict_hash_data[data[i]], hash_data[i]));
|
||||
hash_data[i] = dict_hash_data[data[i]];
|
||||
};
|
||||
|
||||
callForType(std::move(update_weak_hash), size_of_type);
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Index::collectSerializedValueSizes(
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash &) const override;
|
||||
|
||||
@ -325,7 +325,7 @@ public:
|
||||
|
||||
bool containsDefault() const;
|
||||
|
||||
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const;
|
||||
WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const;
|
||||
|
||||
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;
|
||||
|
||||
|
@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
nested->updateHashWithValue(n, hash);
|
||||
}
|
||||
|
||||
void ColumnMap::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnMap::getWeakHash32() const
|
||||
{
|
||||
nested->updateWeakHash32(hash);
|
||||
return nested->getWeakHash32();
|
||||
}
|
||||
|
||||
void ColumnMap::updateHashFast(SipHash & hash) const
|
||||
|
@ -64,7 +64,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
|
@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
getNestedColumn().updateHashWithValue(n, hash);
|
||||
}
|
||||
|
||||
void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnNullable::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
WeakHash32 old_hash = hash;
|
||||
nested_column->updateWeakHash32(hash);
|
||||
WeakHash32 hash = nested_column->getWeakHash32();
|
||||
|
||||
const auto & null_map_data = getNullMapData();
|
||||
auto & hash_data = hash.getData();
|
||||
auto & old_hash_data = old_hash.getData();
|
||||
|
||||
/// Use old data for nulls.
|
||||
/// Use default for nulls.
|
||||
for (size_t row = 0; row < s; ++row)
|
||||
if (null_map_data[row])
|
||||
hash_data[row] = old_hash_data[row];
|
||||
hash_data[row] = WeakHash32::kDefaultInitialValue;
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnNullable::updateHashFast(SipHash & hash) const
|
||||
|
@ -133,7 +133,7 @@ public:
|
||||
void protect() override;
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
// Special function for nullable minmax index
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Core/Names.h>
|
||||
#include <DataTypes/Serializations/SubcolumnsTree.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
|
||||
@ -252,7 +253,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
|
||||
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
||||
WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); }
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
||||
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
||||
|
@ -678,20 +678,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
values->updateHashWithValue(getValueIndex(n), hash);
|
||||
}
|
||||
|
||||
void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnSparse::getWeakHash32() const
|
||||
{
|
||||
if (hash.getData().size() != _size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", _size, hash.getData().size());
|
||||
WeakHash32 values_hash = values->getWeakHash32();
|
||||
WeakHash32 hash(size());
|
||||
|
||||
auto & hash_data = hash.getData();
|
||||
auto & values_hash_data = values_hash.getData();
|
||||
|
||||
auto offset_it = begin();
|
||||
auto & hash_data = hash.getData();
|
||||
for (size_t i = 0; i < _size; ++i, ++offset_it)
|
||||
{
|
||||
size_t value_index = offset_it.getValueIndex();
|
||||
auto data_ref = values->getDataAt(value_index);
|
||||
hash_data[i] = ::updateWeakHash32(reinterpret_cast<const UInt8 *>(data_ref.data), data_ref.size, hash_data[i]);
|
||||
hash_data[i] = values_hash_data[value_index];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnSparse::updateHashFast(SipHash & hash) const
|
||||
|
@ -139,7 +139,7 @@ public:
|
||||
void protect() override;
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
|
||||
|
@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
|
||||
return res;
|
||||
}
|
||||
|
||||
void ColumnString::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnString::getWeakHash32() const
|
||||
{
|
||||
auto s = offsets.size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const UInt8 * pos = chars.data();
|
||||
UInt32 * hash_data = hash.getData().data();
|
||||
@ -130,6 +127,8 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
|
||||
prev_offset = offset;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
|
@ -212,7 +212,7 @@ public:
|
||||
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override
|
||||
{
|
||||
|
@ -201,6 +201,7 @@ bool ColumnTuple::tryInsert(const Field & x)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
++column_length;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -236,6 +237,7 @@ void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t
|
||||
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
columns[i]->insertManyFrom(*src_tuple.columns[i], position, length);
|
||||
column_length += length;
|
||||
}
|
||||
|
||||
void ColumnTuple::insertDefault()
|
||||
@ -308,16 +310,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
column->updateHashWithValue(n, hash);
|
||||
}
|
||||
|
||||
void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnTuple::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
for (const auto & column : columns)
|
||||
column->updateWeakHash32(hash);
|
||||
hash.update(column->getWeakHash32());
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnTuple::updateHashFast(SipHash & hash) const
|
||||
|
@ -81,7 +81,7 @@ public:
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash);
|
||||
}
|
||||
|
||||
void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnVariant::getWeakHash32() const
|
||||
{
|
||||
auto s = size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
|
||||
/// If we have only NULLs, keep hash unchanged.
|
||||
if (hasOnlyNulls())
|
||||
return;
|
||||
return WeakHash32(s);
|
||||
|
||||
/// Optimization for case when there is only 1 non-empty variant and no NULLs.
|
||||
/// In this case we can just calculate weak hash for this variant.
|
||||
if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
variants[*non_empty_local_discr]->updateWeakHash32(hash);
|
||||
return;
|
||||
}
|
||||
return variants[*non_empty_local_discr]->getWeakHash32();
|
||||
|
||||
/// Calculate weak hash for all variants.
|
||||
std::vector<WeakHash32> nested_hashes;
|
||||
for (const auto & variant : variants)
|
||||
{
|
||||
WeakHash32 nested_hash(variant->size());
|
||||
variant->updateWeakHash32(nested_hash);
|
||||
nested_hashes.emplace_back(std::move(nested_hash));
|
||||
}
|
||||
nested_hashes.emplace_back(variant->getWeakHash32());
|
||||
|
||||
/// For each row hash is a hash of corresponding row from corresponding variant.
|
||||
WeakHash32 hash(s);
|
||||
auto & hash_data = hash.getData();
|
||||
const auto & local_discriminators_data = getLocalDiscriminators();
|
||||
const auto & offsets_data = getOffsets();
|
||||
@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const
|
||||
Discriminator discr = local_discriminators_data[i];
|
||||
/// Update hash only for non-NULL values
|
||||
if (discr != NULL_DISCRIMINATOR)
|
||||
{
|
||||
auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]];
|
||||
hash_data[i] = static_cast<UInt32>(hashCRC32(nested_hash, hash_data[i]));
|
||||
}
|
||||
hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void ColumnVariant::updateHashFast(SipHash & hash) const
|
||||
|
@ -213,7 +213,7 @@ public:
|
||||
const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
void expand(const Filter & mask, bool inverted) override;
|
||||
|
@ -73,13 +73,10 @@ void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
WeakHash32 ColumnVector<T>::getWeakHash32() const
|
||||
{
|
||||
auto s = data.size();
|
||||
|
||||
if (hash.getData().size() != s)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: "
|
||||
"column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size()));
|
||||
WeakHash32 hash(s);
|
||||
|
||||
const T * begin = data.data();
|
||||
const T * end = begin + s;
|
||||
@ -91,6 +88,8 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
++begin;
|
||||
++hash_data;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -114,7 +114,7 @@ public:
|
||||
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
WeakHash32 getWeakHash32() const override;
|
||||
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
|
||||
|
@ -300,10 +300,10 @@ public:
|
||||
/// passed bytes to hash must identify sequence of values unambiguously.
|
||||
virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0;
|
||||
|
||||
/// Update hash function value. Hash is calculated for each element.
|
||||
/// Get hash function value. Hash is calculated for each element.
|
||||
/// It's a fast weak hash function. Mainly need to scatter data between threads.
|
||||
/// WeakHash32 must have the same size as column.
|
||||
virtual void updateWeakHash32(WeakHash32 & hash) const = 0;
|
||||
virtual WeakHash32 getWeakHash32() const = 0;
|
||||
|
||||
/// Update state of hash with all column.
|
||||
virtual void updateHashFast(SipHash & hash) const = 0;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -63,8 +64,9 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 & /*hash*/) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
return WeakHash32(s);
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash & /*hash*/) const override
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
#include <optional>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -166,9 +167,9 @@ public:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique.");
|
||||
}
|
||||
|
||||
void updateWeakHash32(WeakHash32 &) const override
|
||||
WeakHash32 getWeakHash32() const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique.");
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique.");
|
||||
}
|
||||
|
||||
void updateHashFast(SipHash &) const override
|
||||
|
@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16)
|
||||
data.push_back(i);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32)
|
||||
data.push_back(i << 16u);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32)
|
||||
data.push_back(i << 16);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64)
|
||||
data.push_back(i << 32u);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32)
|
||||
data.push_back(i << 16);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128)
|
||||
data.push_back(i << 32);
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), col->getData());
|
||||
}
|
||||
@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray)
|
||||
|
||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||
|
||||
WeakHash32 hash(col_arr->size());
|
||||
col_arr->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_arr->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2)
|
||||
|
||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||
|
||||
WeakHash32 hash(col_arr->size());
|
||||
col_arr->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_arr->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray)
|
||||
auto col_arr = ColumnArray::create(std::move(val), std::move(off));
|
||||
auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2));
|
||||
|
||||
WeakHash32 hash(col_arr_arr->size());
|
||||
col_arr_arr->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_arr_arr->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst)
|
||||
|
||||
auto col_const = ColumnConst::create(std::move(inner_col), 256);
|
||||
|
||||
WeakHash32 hash(col_const->size());
|
||||
col_const->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_const->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality)
|
||||
}
|
||||
}
|
||||
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable)
|
||||
|
||||
auto col_null = ColumnNullable::create(std::move(col), std::move(mask));
|
||||
|
||||
WeakHash32 hash(col_null->size());
|
||||
col_null->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_null->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64)
|
||||
columns.emplace_back(std::move(col2));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
|
||||
columns.emplace_back(std::move(col2));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString)
|
||||
columns.emplace_back(std::move(col2));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
|
||||
columns.emplace_back(ColumnArray::create(std::move(val), std::move(off)));
|
||||
auto col_tuple = ColumnTuple::create(std::move(columns));
|
||||
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
WeakHash32 hash = col_tuple->getWeakHash32();
|
||||
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ void * allocNoTrack(size_t size, size_t alignment)
|
||||
{
|
||||
void * buf;
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::shouldSample()))
|
||||
{
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignment))
|
||||
{
|
||||
@ -185,7 +185,7 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
|
||||
}
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::shouldSample()))
|
||||
{
|
||||
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(new_size, alignment))
|
||||
|
@ -84,12 +84,18 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
void append(Self && other)
|
||||
// append items for other inscnace only if there is no such item in current instance
|
||||
void appendIfUniq(Self && other)
|
||||
{
|
||||
auto middle_idx = records.size();
|
||||
std::move(other.records.begin(), other.records.end(), std::back_inserter(records));
|
||||
// merge is stable
|
||||
std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end());
|
||||
chassert(isUniqTypes());
|
||||
// remove duplicates
|
||||
records.erase(std::unique(records.begin(), records.end()), records.end());
|
||||
|
||||
assert(std::is_sorted(records.begin(), records.end()));
|
||||
assert(isUniqTypes());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -142,7 +148,6 @@ private:
|
||||
bool isUniqTypes() const
|
||||
{
|
||||
auto uniq_it = std::adjacent_find(records.begin(), records.end());
|
||||
|
||||
return uniq_it == records.end();
|
||||
}
|
||||
|
||||
@ -161,8 +166,6 @@ private:
|
||||
|
||||
|
||||
records.emplace(it, type_idx, item);
|
||||
|
||||
chassert(isUniqTypes());
|
||||
}
|
||||
|
||||
Records::const_iterator getImpl(std::type_index type_idx) const
|
||||
|
@ -244,6 +244,15 @@ private:
|
||||
const char * className() const noexcept override { return "DB::ErrnoException"; }
|
||||
};
|
||||
|
||||
/// An exception to use in unit tests to test interfaces.
|
||||
/// It is distinguished from others, so it does not have to be logged.
|
||||
class TestException : public Exception
|
||||
{
|
||||
public:
|
||||
using Exception::Exception;
|
||||
};
|
||||
|
||||
|
||||
using Exceptions = std::vector<std::exception_ptr>;
|
||||
|
||||
/** Try to write an exception to the log (and forget about it).
|
||||
|
@ -217,6 +217,13 @@ void printReport([[maybe_unused]] uintptr_t fault_address)
|
||||
reinterpret_cast<void **>(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); });
|
||||
}
|
||||
|
||||
std::atomic<bool> init_finished = false;
|
||||
|
||||
void initFinished()
|
||||
{
|
||||
init_finished.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
std::atomic<double> force_sample_probability = 0.0;
|
||||
|
||||
void setForceSampleProbability(double value)
|
||||
|
@ -19,12 +19,30 @@ bool isGWPAsanError(uintptr_t fault_address);
|
||||
|
||||
void printReport(uintptr_t fault_address);
|
||||
|
||||
extern std::atomic<bool> init_finished;
|
||||
|
||||
void initFinished();
|
||||
|
||||
extern std::atomic<double> force_sample_probability;
|
||||
|
||||
void setForceSampleProbability(double value);
|
||||
|
||||
/**
|
||||
* We'd like to postpone sampling allocations under the startup is finished. There are mainly
|
||||
* two reasons for that:
|
||||
*
|
||||
* - To avoid complex issues with initialization order
|
||||
* - Don't waste MaxSimultaneousAllocations on global objects as it's not useful
|
||||
*/
|
||||
inline bool shouldSample()
|
||||
{
|
||||
return init_finished.load(std::memory_order_relaxed) && GuardedAlloc.shouldSample();
|
||||
}
|
||||
|
||||
inline bool shouldForceSample()
|
||||
{
|
||||
if (!init_finished.load(std::memory_order_relaxed))
|
||||
return false;
|
||||
std::bernoulli_distribution dist(force_sample_probability.load(std::memory_order_relaxed));
|
||||
return dist(thread_local_rng);
|
||||
}
|
||||
|
@ -39,16 +39,16 @@ protected:
|
||||
|
||||
public:
|
||||
/// For compatibility with SQL, it's possible to specify that certain function name is case insensitive.
|
||||
enum CaseSensitiveness
|
||||
enum Case
|
||||
{
|
||||
CaseSensitive,
|
||||
CaseInsensitive
|
||||
Sensitive,
|
||||
Insensitive
|
||||
};
|
||||
|
||||
/** Register additional name for value
|
||||
* real_name have to be already registered.
|
||||
*/
|
||||
void registerAlias(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive)
|
||||
void registerAlias(const String & alias_name, const String & real_name, Case case_sensitiveness = Sensitive)
|
||||
{
|
||||
const auto & creator_map = getMap();
|
||||
const auto & case_insensitive_creator_map = getCaseInsensitiveMap();
|
||||
@ -66,12 +66,12 @@ public:
|
||||
}
|
||||
|
||||
/// We need sure the real_name exactly exists when call the function directly.
|
||||
void registerAliasUnchecked(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive)
|
||||
void registerAliasUnchecked(const String & alias_name, const String & real_name, Case case_sensitiveness = Sensitive)
|
||||
{
|
||||
String alias_name_lowercase = Poco::toLower(alias_name);
|
||||
const String factory_name = getFactoryName();
|
||||
|
||||
if (case_sensitiveness == CaseInsensitive)
|
||||
if (case_sensitiveness == Insensitive)
|
||||
{
|
||||
if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_name).second)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique", factory_name, alias_name);
|
||||
|
@ -116,32 +116,32 @@ public:
|
||||
return elements;
|
||||
}
|
||||
|
||||
bool exists(const std::string & path) const override
|
||||
bool exists(const std::string & file_name) const override
|
||||
{
|
||||
return fs::exists(getPath(path));
|
||||
return fs::exists(getPath(file_name));
|
||||
}
|
||||
|
||||
std::string read(const std::string & path) const override
|
||||
std::string read(const std::string & file_name) const override
|
||||
{
|
||||
ReadBufferFromFile in(getPath(path));
|
||||
ReadBufferFromFile in(getPath(file_name));
|
||||
std::string data;
|
||||
readStringUntilEOF(data, in);
|
||||
return data;
|
||||
}
|
||||
|
||||
void write(const std::string & path, const std::string & data, bool replace) override
|
||||
void write(const std::string & file_name, const std::string & data, bool replace) override
|
||||
{
|
||||
if (!replace && fs::exists(path))
|
||||
if (!replace && fs::exists(file_name))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
|
||||
"Metadata file {} for named collection already exists",
|
||||
path);
|
||||
file_name);
|
||||
}
|
||||
|
||||
fs::create_directories(root_path);
|
||||
|
||||
auto tmp_path = getPath(path + ".tmp");
|
||||
auto tmp_path = getPath(file_name + ".tmp");
|
||||
WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
|
||||
writeString(data, out);
|
||||
|
||||
@ -150,28 +150,32 @@ public:
|
||||
out.sync();
|
||||
out.close();
|
||||
|
||||
fs::rename(tmp_path, getPath(path));
|
||||
fs::rename(tmp_path, getPath(file_name));
|
||||
}
|
||||
|
||||
void remove(const std::string & path) override
|
||||
void remove(const std::string & file_name) override
|
||||
{
|
||||
if (!removeIfExists(getPath(path)))
|
||||
if (!removeIfExists(file_name))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
|
||||
"Cannot remove `{}`, because it doesn't exist", path);
|
||||
"Cannot remove `{}`, because it doesn't exist", file_name);
|
||||
}
|
||||
}
|
||||
|
||||
bool removeIfExists(const std::string & path) override
|
||||
bool removeIfExists(const std::string & file_name) override
|
||||
{
|
||||
return fs::remove(getPath(path));
|
||||
return fs::remove(getPath(file_name));
|
||||
}
|
||||
|
||||
private:
|
||||
std::string getPath(const std::string & path) const
|
||||
std::string getPath(const std::string & file_name) const
|
||||
{
|
||||
return fs::path(root_path) / path;
|
||||
const auto file_name_as_path = fs::path(file_name);
|
||||
if (file_name_as_path.is_absolute())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Filename {} cannot be an absolute path", file_name);
|
||||
|
||||
return fs::path(root_path) / file_name_as_path;
|
||||
}
|
||||
|
||||
/// Delete .tmp files. They could be left undeleted in case of
|
||||
@ -264,49 +268,49 @@ public:
|
||||
return children;
|
||||
}
|
||||
|
||||
bool exists(const std::string & path) const override
|
||||
bool exists(const std::string & file_name) const override
|
||||
{
|
||||
return getClient()->exists(getPath(path));
|
||||
return getClient()->exists(getPath(file_name));
|
||||
}
|
||||
|
||||
std::string read(const std::string & path) const override
|
||||
std::string read(const std::string & file_name) const override
|
||||
{
|
||||
return getClient()->get(getPath(path));
|
||||
return getClient()->get(getPath(file_name));
|
||||
}
|
||||
|
||||
void write(const std::string & path, const std::string & data, bool replace) override
|
||||
void write(const std::string & file_name, const std::string & data, bool replace) override
|
||||
{
|
||||
if (replace)
|
||||
{
|
||||
getClient()->createOrUpdate(getPath(path), data, zkutil::CreateMode::Persistent);
|
||||
getClient()->createOrUpdate(getPath(file_name), data, zkutil::CreateMode::Persistent);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto code = getClient()->tryCreate(getPath(path), data, zkutil::CreateMode::Persistent);
|
||||
auto code = getClient()->tryCreate(getPath(file_name), data, zkutil::CreateMode::Persistent);
|
||||
|
||||
if (code == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
|
||||
"Metadata file {} for named collection already exists",
|
||||
path);
|
||||
file_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void remove(const std::string & path) override
|
||||
void remove(const std::string & file_name) override
|
||||
{
|
||||
getClient()->remove(getPath(path));
|
||||
getClient()->remove(getPath(file_name));
|
||||
}
|
||||
|
||||
bool removeIfExists(const std::string & path) override
|
||||
bool removeIfExists(const std::string & file_name) override
|
||||
{
|
||||
auto code = getClient()->tryRemove(getPath(path));
|
||||
auto code = getClient()->tryRemove(getPath(file_name));
|
||||
if (code == Coordination::Error::ZOK)
|
||||
return true;
|
||||
if (code == Coordination::Error::ZNONODE)
|
||||
return false;
|
||||
throw Coordination::Exception::fromPath(code, getPath(path));
|
||||
throw Coordination::Exception::fromPath(code, getPath(file_name));
|
||||
}
|
||||
|
||||
private:
|
||||
@ -320,9 +324,13 @@ private:
|
||||
return zookeeper_client;
|
||||
}
|
||||
|
||||
std::string getPath(const std::string & path) const
|
||||
std::string getPath(const std::string & file_name) const
|
||||
{
|
||||
return fs::path(root_path) / path;
|
||||
const auto file_name_as_path = fs::path(file_name);
|
||||
if (file_name_as_path.is_absolute())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Filename {} cannot be an absolute path", file_name);
|
||||
|
||||
return fs::path(root_path) / file_name_as_path;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -442,8 +442,6 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \
|
||||
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
|
||||
M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \
|
||||
M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \
|
||||
M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \
|
||||
\
|
||||
M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \
|
||||
M(WriteBufferFromS3Bytes, "Bytes written to S3.") \
|
||||
|
635
src/Common/SignalHandlers.cpp
Normal file
635
src/Common/SignalHandlers.cpp
Normal file
@ -0,0 +1,635 @@
|
||||
#include <Common/SignalHandlers.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Daemon/BaseDaemon.h>
|
||||
#include <Daemon/SentryWriter.h>
|
||||
#include <base/sleep.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
||||
extern const int CANNOT_SEND_SIGNAL;
|
||||
}
|
||||
}
|
||||
|
||||
using namespace DB;
|
||||
|
||||
|
||||
void call_default_signal_handler(int sig)
|
||||
{
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
|
||||
|
||||
if (0 != raise(sig))
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SEND_SIGNAL, "Cannot send signal");
|
||||
}
|
||||
|
||||
|
||||
void writeSignalIDtoSignalPipe(int sig)
|
||||
{
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
writeBinary(sig, out);
|
||||
out.next();
|
||||
|
||||
errno = saved_errno;
|
||||
}
|
||||
|
||||
void closeLogsSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
writeSignalIDtoSignalPipe(sig);
|
||||
}
|
||||
|
||||
|
||||
void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
{
|
||||
if (asynchronous_stack_unwinding && sig == SIGSEGV)
|
||||
siglongjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1);
|
||||
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
auto saved_errno = errno; /// We must restore previous value of errno in signal handler.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
|
||||
const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
|
||||
const StackTrace stack_trace(*signal_context);
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (const auto fault_address = reinterpret_cast<uintptr_t>(info->si_addr);
|
||||
GWPAsan::isGWPAsanError(fault_address))
|
||||
GWPAsan::printReport(fault_address);
|
||||
#endif
|
||||
|
||||
writeBinary(sig, out);
|
||||
writePODBinary(*info, out);
|
||||
writePODBinary(signal_context, out);
|
||||
writePODBinary(stack_trace, out);
|
||||
writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector<StackTrace::FramePointers>{}, out);
|
||||
writeBinary(static_cast<UInt32>(getThreadId()), out);
|
||||
writePODBinary(current_thread, out);
|
||||
|
||||
out.next();
|
||||
|
||||
if (sig != SIGTSTP) /// This signal is used for debugging.
|
||||
{
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
/// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case.
|
||||
for (size_t i = 0; i < 300; ++i)
|
||||
{
|
||||
/// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
|
||||
if (HandledSignals::instance().fatal_error_printed.test())
|
||||
break;
|
||||
|
||||
/// This coarse method of synchronization is perfectly ok for fatal signals.
|
||||
sleepForSeconds(1);
|
||||
}
|
||||
|
||||
/// Wait for all logs flush operations
|
||||
sleepForSeconds(3);
|
||||
call_default_signal_handler(sig);
|
||||
}
|
||||
|
||||
errno = saved_errno;
|
||||
}
|
||||
|
||||
|
||||
[[noreturn]] void terminate_handler()
|
||||
{
|
||||
static thread_local bool terminating = false;
|
||||
if (terminating)
|
||||
abort();
|
||||
|
||||
terminating = true;
|
||||
|
||||
std::string log_message;
|
||||
|
||||
if (std::current_exception())
|
||||
log_message = "Terminate called for uncaught exception:\n" + getCurrentExceptionMessage(true);
|
||||
else
|
||||
log_message = "Terminate called without an active exception";
|
||||
|
||||
/// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
|
||||
/// And the buffer should not be too small because our exception messages can be large.
|
||||
static constexpr size_t buf_size = PIPE_BUF;
|
||||
|
||||
if (log_message.size() > buf_size - 16)
|
||||
log_message.resize(buf_size - 16);
|
||||
|
||||
char buf[buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], buf_size, buf);
|
||||
|
||||
writeBinary(static_cast<int>(SignalListener::StdTerminate), out);
|
||||
writeBinary(static_cast<UInt32>(getThreadId()), out);
|
||||
writeBinary(log_message, out);
|
||||
out.next();
|
||||
|
||||
abort();
|
||||
}
|
||||
|
||||
#if defined(SANITIZER)
|
||||
template <typename T>
|
||||
struct ValueHolder
|
||||
{
|
||||
ValueHolder(T value_) : value(value_)
|
||||
{}
|
||||
|
||||
T value;
|
||||
};
|
||||
|
||||
extern "C" void __sanitizer_set_death_callback(void (*)());
|
||||
|
||||
/// Sanitizers may not expect some function calls from death callback.
|
||||
/// Let's try to disable instrumentation to avoid possible issues.
|
||||
/// However, this callback may call other functions that are still instrumented.
|
||||
/// We can try [[clang::always_inline]] attribute for statements in future (available in clang-15)
|
||||
/// See https://github.com/google/sanitizers/issues/1543 and https://github.com/google/sanitizers/issues/1549.
|
||||
static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback()
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
/// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info.
|
||||
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
|
||||
|
||||
const StackTrace stack_trace;
|
||||
|
||||
writeBinary(SignalListener::SanitizerTrap, out);
|
||||
writePODBinary(stack_trace, out);
|
||||
/// We create a dummy struct with a constructor so DISABLE_SANITIZER_INSTRUMENTATION is not applied to it
|
||||
/// otherwise, Memory sanitizer can't know that values initiialized inside this function are actually initialized
|
||||
/// because instrumentations are disabled leading to false positives later on
|
||||
ValueHolder<UInt32> thread_id{static_cast<UInt32>(getThreadId())};
|
||||
writeBinary(thread_id.value, out);
|
||||
writePODBinary(current_thread, out);
|
||||
|
||||
out.next();
|
||||
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
sleepForSeconds(20);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void HandledSignals::addSignalHandler(const std::vector<int> & signals, signal_function handler, bool register_signal)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sa.sa_mask);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sa.sa_mask, signal);
|
||||
#else
|
||||
if (sigemptyset(&sa.sa_mask))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sa.sa_mask, signal))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
#endif
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaction(signal, &sa, nullptr))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
if (register_signal)
|
||||
std::copy(signals.begin(), signals.end(), std::back_inserter(handled_signals));
|
||||
}
|
||||
|
||||
void blockSignals(const std::vector<int> & signals)
|
||||
{
|
||||
sigset_t sig_set;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sig_set);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sig_set, signal);
|
||||
#else
|
||||
if (sigemptyset(&sig_set))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sig_set, signal))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
#endif
|
||||
|
||||
if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
}
|
||||
|
||||
|
||||
void SignalListener::run()
|
||||
{
|
||||
static_assert(PIPE_BUF >= 512);
|
||||
static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
|
||||
char buf[signal_pipe_buf_size];
|
||||
auto & signal_pipe = HandledSignals::instance().signal_pipe;
|
||||
ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
int sig = 0;
|
||||
readBinary(sig, in);
|
||||
// We may log some specific signals afterwards, with different log
|
||||
// levels and more info, but for completeness we log all signals
|
||||
// here at trace level.
|
||||
// Don't use strsignal here, because it's not thread-safe.
|
||||
LOG_TRACE(log, "Received signal {}", sig);
|
||||
|
||||
if (sig == StopThread)
|
||||
{
|
||||
LOG_INFO(log, "Stop SignalListener thread");
|
||||
break;
|
||||
}
|
||||
else if (sig == SIGHUP)
|
||||
{
|
||||
LOG_DEBUG(log, "Received signal to close logs.");
|
||||
BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger());
|
||||
LOG_INFO(log, "Opened new log file after received signal.");
|
||||
}
|
||||
else if (sig == StdTerminate)
|
||||
{
|
||||
UInt32 thread_num;
|
||||
std::string message;
|
||||
|
||||
readBinary(thread_num, in);
|
||||
readBinary(message, in);
|
||||
|
||||
onTerminate(message, thread_num);
|
||||
}
|
||||
else if (sig == SIGINT ||
|
||||
sig == SIGQUIT ||
|
||||
sig == SIGTERM)
|
||||
{
|
||||
if (daemon)
|
||||
daemon->handleSignal(sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
siginfo_t info{};
|
||||
ucontext_t * context{};
|
||||
StackTrace stack_trace(NoCapture{});
|
||||
std::vector<StackTrace::FramePointers> thread_frame_pointers;
|
||||
UInt32 thread_num{};
|
||||
ThreadStatus * thread_ptr{};
|
||||
|
||||
if (sig != SanitizerTrap)
|
||||
{
|
||||
readPODBinary(info, in);
|
||||
readPODBinary(context, in);
|
||||
}
|
||||
|
||||
readPODBinary(stack_trace, in);
|
||||
if (sig != SanitizerTrap)
|
||||
readVectorBinary(thread_frame_pointers, in);
|
||||
readBinary(thread_num, in);
|
||||
readPODBinary(thread_ptr, in);
|
||||
|
||||
/// This allows to receive more signals if failure happens inside onFault function.
|
||||
/// Example: segfault while symbolizing stack trace.
|
||||
try
|
||||
{
|
||||
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr); }).detach();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Likely cannot allocate thread
|
||||
onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignalListener::onTerminate(std::string_view message, UInt32 thread_num) const
|
||||
{
|
||||
size_t pos = message.find('\n');
|
||||
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", thread_num, message.substr(0, pos));
|
||||
|
||||
/// Print trace from std::terminate exception line-by-line to make it easy for grep.
|
||||
while (pos != std::string_view::npos)
|
||||
{
|
||||
++pos;
|
||||
size_t next_pos = message.find('\n', pos);
|
||||
size_t size = next_pos;
|
||||
if (next_pos != std::string_view::npos)
|
||||
size = next_pos - pos;
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(message.substr(pos, size)));
|
||||
pos = next_pos;
|
||||
}
|
||||
}
|
||||
|
||||
void SignalListener::onFault(
|
||||
int sig,
|
||||
const siginfo_t & info,
|
||||
ucontext_t * context,
|
||||
const StackTrace & stack_trace,
|
||||
const std::vector<StackTrace::FramePointers> & thread_frame_pointers,
|
||||
UInt32 thread_num,
|
||||
DB::ThreadStatus * thread_ptr) const
|
||||
try
|
||||
{
|
||||
ThreadStatus thread_status;
|
||||
|
||||
/// First log those fields that are safe to access and that should not cause new fault.
|
||||
/// That way we will have some duplicated info in the log but we don't loose important info
|
||||
/// in case of double fault.
|
||||
|
||||
LOG_FATAL(log, "########## Short fault info ############");
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
|
||||
thread_num, sig);
|
||||
|
||||
std::string signal_description = "Unknown signal";
|
||||
|
||||
/// Some of these are not really signals, but our own indications on failure reason.
|
||||
if (sig == StdTerminate)
|
||||
signal_description = "std::terminate";
|
||||
else if (sig == SanitizerTrap)
|
||||
signal_description = "sanitizer trap";
|
||||
else if (sig >= 0)
|
||||
signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
|
||||
|
||||
LOG_FATAL(log, "Signal description: {}", signal_description);
|
||||
|
||||
String error_message;
|
||||
|
||||
if (sig != SanitizerTrap)
|
||||
error_message = signalToErrorMessage(sig, info, *context);
|
||||
else
|
||||
error_message = "Sanitizer trap.";
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(error_message));
|
||||
|
||||
String bare_stacktrace_str;
|
||||
if (stack_trace.getSize())
|
||||
{
|
||||
/// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
|
||||
/// NOTE: This still require memory allocations and mutex lock inside logger.
|
||||
/// BTW we can also print it to stderr using write syscalls.
|
||||
|
||||
WriteBufferFromOwnString bare_stacktrace;
|
||||
writeString("Stack trace:", bare_stacktrace);
|
||||
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
|
||||
{
|
||||
writeChar(' ', bare_stacktrace);
|
||||
writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
|
||||
}
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
|
||||
bare_stacktrace_str = bare_stacktrace.str();
|
||||
}
|
||||
|
||||
/// Now try to access potentially unsafe data in thread_ptr.
|
||||
|
||||
String query_id;
|
||||
String query;
|
||||
|
||||
/// Send logs from this thread to client if possible.
|
||||
/// It will allow client to see failure messages directly.
|
||||
if (thread_ptr)
|
||||
{
|
||||
query_id = thread_ptr->getQueryId();
|
||||
query = thread_ptr->getQueryForLog();
|
||||
|
||||
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
|
||||
{
|
||||
CurrentThread::attachInternalTextLogsQueue(logs_queue, LogsLevel::trace);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_FATAL(log, "########################################");
|
||||
|
||||
if (query_id.empty())
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
|
||||
thread_num, signal_description, sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "",
|
||||
thread_num, query_id, query, signal_description, sig);
|
||||
}
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(error_message));
|
||||
|
||||
if (!bare_stacktrace_str.empty())
|
||||
{
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace_str));
|
||||
}
|
||||
|
||||
/// Write symbolized stack trace line by line for better grep-ability.
|
||||
stack_trace.toStringEveryLine([&](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); });
|
||||
|
||||
/// In case it's a scheduled job write all previous jobs origins call stacks
|
||||
std::for_each(thread_frame_pointers.rbegin(), thread_frame_pointers.rend(),
|
||||
[this](const StackTrace::FramePointers & frame_pointers)
|
||||
{
|
||||
if (size_t size = std::ranges::find(frame_pointers, nullptr) - frame_pointers.begin())
|
||||
{
|
||||
LOG_FATAL(log, "========================================");
|
||||
WriteBufferFromOwnString bare_stacktrace;
|
||||
writeString("Job's origin stack trace:", bare_stacktrace);
|
||||
std::for_each_n(frame_pointers.begin(), size,
|
||||
[&bare_stacktrace](const void * ptr)
|
||||
{
|
||||
writeChar(' ', bare_stacktrace);
|
||||
writePointerHex(ptr, bare_stacktrace);
|
||||
}
|
||||
);
|
||||
|
||||
LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
|
||||
|
||||
StackTrace::toStringEveryLine(const_cast<void **>(frame_pointers.data()), 0, size, [this](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); });
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
|
||||
/// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp
|
||||
|
||||
if (daemon && daemon->stored_binary_hash.empty())
|
||||
{
|
||||
LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read.");
|
||||
}
|
||||
else if (daemon)
|
||||
{
|
||||
String calculated_binary_hash = getHashOfLoadedBinaryHex();
|
||||
if (calculated_binary_hash == daemon->stored_binary_hash)
|
||||
{
|
||||
LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(
|
||||
log,
|
||||
"Calculated checksum of the executable ({0}) does not correspond"
|
||||
" to the reference checksum stored in the executable ({1})."
|
||||
" This may indicate one of the following:"
|
||||
" - the executable was changed just after startup;"
|
||||
" - the executable was corrupted on disk due to faulty hardware;"
|
||||
" - the loaded executable was corrupted in memory due to faulty hardware;"
|
||||
" - the file was intentionally modified;"
|
||||
" - a logical error in the code.",
|
||||
calculated_binary_hash,
|
||||
daemon->stored_binary_hash);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Write crash to system.crash_log table if available.
|
||||
if (collectCrashLog)
|
||||
collectCrashLog(sig, thread_num, query_id, stack_trace);
|
||||
|
||||
Context::getGlobalContextInstance()->handleCrash();
|
||||
|
||||
/// Send crash report to developers (if configured)
|
||||
if (sig != SanitizerTrap)
|
||||
{
|
||||
if (daemon)
|
||||
{
|
||||
if (auto * sentry = SentryWriter::getInstance())
|
||||
sentry->onSignal(sig, error_message, stack_trace.getFramePointers(), stack_trace.getOffset(), stack_trace.getSize());
|
||||
}
|
||||
|
||||
/// Advice the user to send it manually.
|
||||
if (std::string_view(VERSION_OFFICIAL).contains("official build"))
|
||||
{
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
|
||||
/// Approximate support period, upper bound.
|
||||
if (time(nullptr) - date_lut.makeDate(2000 + VERSION_MAJOR, VERSION_MINOR, 1) < (365 + 30) * 86400)
|
||||
{
|
||||
LOG_FATAL(log, "Report this error to https://github.com/ClickHouse/ClickHouse/issues");
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "ClickHouse version {} is old and should be upgraded to the latest version.", VERSION_STRING);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build.");
|
||||
}
|
||||
}
|
||||
|
||||
/// List changed settings.
|
||||
if (!query_id.empty())
|
||||
{
|
||||
ContextPtr query_context = thread_ptr->getQueryContext();
|
||||
if (query_context)
|
||||
{
|
||||
String changed_settings = query_context->getSettingsRef().toString();
|
||||
|
||||
if (changed_settings.empty())
|
||||
LOG_FATAL(log, "No settings were changed");
|
||||
else
|
||||
LOG_FATAL(log, "Changed settings: {}", changed_settings);
|
||||
}
|
||||
}
|
||||
|
||||
/// When everything is done, we will try to send these error messages to the client.
|
||||
if (thread_ptr)
|
||||
thread_ptr->onFatalError();
|
||||
|
||||
HandledSignals::instance().fatal_error_printed.test_and_set();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors.
|
||||
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
|
||||
LOG_FATAL(log, message);
|
||||
}
|
||||
|
||||
HandledSignals::HandledSignals()
|
||||
{
|
||||
signal_pipe.setNonBlockingWrite();
|
||||
signal_pipe.tryIncreaseSize(1 << 20);
|
||||
}
|
||||
|
||||
void HandledSignals::reset()
|
||||
{
|
||||
/// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after.
|
||||
for (int sig : handled_signals)
|
||||
{
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
{
|
||||
try
|
||||
{
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler");
|
||||
}
|
||||
catch (ErrnoException &)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
signal_pipe.close();
|
||||
}
|
||||
|
||||
HandledSignals::~HandledSignals()
|
||||
{
|
||||
reset();
|
||||
};
|
||||
|
||||
HandledSignals & HandledSignals::instance()
|
||||
{
|
||||
static HandledSignals res;
|
||||
return res;
|
||||
}
|
||||
|
||||
void HandledSignals::setupTerminateHandler()
|
||||
{
|
||||
std::set_terminate(terminate_handler);
|
||||
}
|
||||
|
||||
void HandledSignals::setupCommonDeadlySignalHandlers()
|
||||
{
|
||||
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, true);
|
||||
|
||||
#if defined(SANITIZER)
|
||||
__sanitizer_set_death_callback(sanitizerDeathCallback);
|
||||
#endif
|
||||
}
|
||||
|
||||
void HandledSignals::setupCommonTerminateRequestSignalHandlers()
|
||||
{
|
||||
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, true);
|
||||
}
|
110
src/Common/SignalHandlers.h
Normal file
110
src/Common/SignalHandlers.h
Normal file
@ -0,0 +1,110 @@
|
||||
#pragma once
|
||||
#include <csignal>
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <Common/PipeFDs.h>
|
||||
#include <Common/StackTrace.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Poco/Runnable.h>
|
||||
|
||||
class BaseDaemon;
|
||||
|
||||
/** Reset signal handler to the default and send signal to itself.
|
||||
* It's called from user signal handler to write core dump.
|
||||
*/
|
||||
void call_default_signal_handler(int sig);
|
||||
|
||||
const size_t signal_pipe_buf_size =
|
||||
sizeof(int)
|
||||
+ sizeof(siginfo_t)
|
||||
+ sizeof(ucontext_t*)
|
||||
+ sizeof(StackTrace)
|
||||
+ sizeof(UInt64)
|
||||
+ sizeof(UInt32)
|
||||
+ sizeof(void*);
|
||||
|
||||
using signal_function = void(int, siginfo_t*, void*);
|
||||
|
||||
void writeSignalIDtoSignalPipe(int sig);
|
||||
|
||||
/** Signal handler for HUP */
|
||||
void closeLogsSignalHandler(int sig, siginfo_t *, void *);
|
||||
|
||||
void terminateRequestedSignalHandler(int sig, siginfo_t *, void *);
|
||||
|
||||
|
||||
/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
|
||||
*/
|
||||
void signalHandler(int sig, siginfo_t * info, void * context);
|
||||
|
||||
|
||||
/** To use with std::set_terminate.
|
||||
* Collects slightly more info than __gnu_cxx::__verbose_terminate_handler,
|
||||
* and send it to pipe. Other thread will read this info from pipe and asynchronously write it to log.
|
||||
* Look at libstdc++-v3/libsupc++/vterminate.cc for example.
|
||||
*/
|
||||
[[noreturn]] void terminate_handler();
|
||||
|
||||
/// Avoid link time dependency on DB/Interpreters - will use this function only when linked.
|
||||
__attribute__((__weak__)) void collectCrashLog(
|
||||
Int32 signal, UInt64 thread_id, const String & query_id, const StackTrace & stack_trace);
|
||||
|
||||
|
||||
void blockSignals(const std::vector<int> & signals);
|
||||
|
||||
|
||||
/** The thread that read info about signal or std::terminate from pipe.
|
||||
* On HUP, close log files (for new files to be opened later).
|
||||
* On information about std::terminate, write it to log.
|
||||
* On other signals, write info to log.
|
||||
*/
|
||||
class SignalListener : public Poco::Runnable
|
||||
{
|
||||
public:
|
||||
static constexpr int StdTerminate = -1;
|
||||
static constexpr int StopThread = -2;
|
||||
static constexpr int SanitizerTrap = -3;
|
||||
|
||||
explicit SignalListener(BaseDaemon * daemon_, LoggerPtr log_)
|
||||
: daemon(daemon_), log(log_)
|
||||
{
|
||||
}
|
||||
|
||||
void run() override;
|
||||
|
||||
private:
|
||||
BaseDaemon * daemon;
|
||||
LoggerPtr log;
|
||||
|
||||
void onTerminate(std::string_view message, UInt32 thread_num) const;
|
||||
|
||||
void onFault(
|
||||
int sig,
|
||||
const siginfo_t & info,
|
||||
ucontext_t * context,
|
||||
const StackTrace & stack_trace,
|
||||
const std::vector<StackTrace::FramePointers> & thread_frame_pointers,
|
||||
UInt32 thread_num,
|
||||
DB::ThreadStatus * thread_ptr) const;
|
||||
};
|
||||
|
||||
struct HandledSignals
|
||||
{
|
||||
std::vector<int> handled_signals;
|
||||
DB::PipeFDs signal_pipe;
|
||||
std::atomic_flag fatal_error_printed;
|
||||
|
||||
HandledSignals();
|
||||
~HandledSignals();
|
||||
|
||||
void setupTerminateHandler();
|
||||
void setupCommonDeadlySignalHandlers();
|
||||
void setupCommonTerminateRequestSignalHandlers();
|
||||
|
||||
void addSignalHandler(const std::vector<int> & signals, signal_function handler, bool register_signal);
|
||||
|
||||
void reset();
|
||||
|
||||
static HandledSignals & instance();
|
||||
};
|
@ -1,2 +1,24 @@
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void WeakHash32::update(const WeakHash32 & other)
|
||||
{
|
||||
size_t size = data.size();
|
||||
if (size != other.data.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:"
|
||||
"left size is {}, right size is {}", size, other.data.size());
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
data[i] = static_cast<UInt32>(intHashCRC32(other.data[i], data[i]));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -11,9 +11,8 @@ namespace DB
|
||||
/// The main purpose why this class needed is to support data initialization. Initially, every bit is 1.
|
||||
class WeakHash32
|
||||
{
|
||||
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
|
||||
|
||||
public:
|
||||
static constexpr UInt32 kDefaultInitialValue = ~UInt32(0);
|
||||
|
||||
using Container = PaddedPODArray<UInt32>;
|
||||
|
||||
@ -22,6 +21,8 @@ public:
|
||||
|
||||
void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); }
|
||||
|
||||
void update(const WeakHash32 & other);
|
||||
|
||||
const Container & getData() const { return data; }
|
||||
Container & getData() { return data; }
|
||||
|
||||
|
@ -13,14 +13,14 @@
|
||||
#include <Common/ZooKeeper/Types.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperCommon.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include <base/sort.h>
|
||||
#include <base/map.h>
|
||||
#include <base/getFQDNOrHostName.h>
|
||||
#include <Core/ServerUUID.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include "Common/ZooKeeper/IKeeper.h"
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/ZooKeeper/IKeeper.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
@ -114,7 +114,11 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper>
|
||||
/// availability_zones is empty on server startup or after config reloading
|
||||
/// We will keep the az info when starting new sessions
|
||||
availability_zones = args.availability_zones;
|
||||
LOG_TEST(log, "Availability zones from config: [{}], client: {}", fmt::join(availability_zones, ", "), args.client_availability_zone);
|
||||
|
||||
LOG_TEST(log, "Availability zones from config: [{}], client: {}",
|
||||
fmt::join(collections::map(availability_zones, [](auto s){ return DB::quoteString(s); }), ", "),
|
||||
DB::quoteString(args.client_availability_zone));
|
||||
|
||||
if (args.availability_zone_autodetect)
|
||||
updateAvailabilityZones();
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ requires DB::OptionalArgument<TAlign...>
|
||||
inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::shouldSample()))
|
||||
{
|
||||
if constexpr (sizeof...(TAlign) == 1)
|
||||
{
|
||||
@ -83,7 +83,7 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
|
||||
inline ALWAYS_INLINE void * newNoExcept(std::size_t size) noexcept
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::shouldSample()))
|
||||
{
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size))
|
||||
{
|
||||
@ -102,7 +102,7 @@ inline ALWAYS_INLINE void * newNoExcept(std::size_t size) noexcept
|
||||
inline ALWAYS_INLINE void * newNoExcept(std::size_t size, std::align_val_t align) noexcept
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::shouldSample()))
|
||||
{
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align)))
|
||||
{
|
||||
|
@ -54,16 +54,3 @@ TEST(ShellCommand, ExecuteWithInput)
|
||||
|
||||
EXPECT_EQ(res, "Hello, world!\n");
|
||||
}
|
||||
|
||||
TEST(ShellCommand, AutoWait)
|
||||
{
|
||||
// <defunct> hunting:
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
auto command = ShellCommand::execute("echo " + std::to_string(i));
|
||||
//command->wait(); // now automatic
|
||||
}
|
||||
|
||||
// std::cerr << "inspect me: ps auxwwf\n";
|
||||
// std::this_thread::sleep_for(std::chrono::seconds(100));
|
||||
}
|
||||
|
@ -55,6 +55,7 @@ struct Settings;
|
||||
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
|
||||
M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
|
||||
M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \
|
||||
M(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \
|
||||
M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
|
||||
M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \
|
||||
M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
|
||||
|
@ -183,8 +183,6 @@
|
||||
M(ReadBufferFromS3InitMicroseconds) \
|
||||
M(ReadBufferFromS3Bytes) \
|
||||
M(ReadBufferFromS3RequestsErrors) \
|
||||
M(ReadBufferFromS3ResetSessions) \
|
||||
M(ReadBufferFromS3PreservedSessions) \
|
||||
\
|
||||
M(WriteBufferFromS3Microseconds) \
|
||||
M(WriteBufferFromS3Bytes) \
|
||||
|
@ -5,18 +5,27 @@
|
||||
|
||||
#include <Coordination/CoordinationSettings.h>
|
||||
#include <Coordination/Defines.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/S3/Credentials.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Poco/Util/JSONConfiguration.h>
|
||||
#include <Coordination/KeeperConstants.h>
|
||||
#include <Server/CloudPlacementInfo.h>
|
||||
#include <Coordination/KeeperFeatureFlags.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
#include <Disks/DiskSelector.h>
|
||||
#include <IO/S3/Credentials.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "config.h"
|
||||
#if USE_ROCKSDB
|
||||
#include <rocksdb/table.h>
|
||||
#include <rocksdb/convenience.h>
|
||||
#include <rocksdb/utilities/db_ttl.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -24,6 +33,8 @@ namespace ErrorCodes
|
||||
{
|
||||
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ROCKSDB_ERROR;
|
||||
|
||||
}
|
||||
|
||||
@ -41,6 +52,95 @@ KeeperContext::KeeperContext(bool standalone_keeper_, CoordinationSettingsPtr co
|
||||
system_nodes_with_data[keeper_api_version_path] = toString(static_cast<uint8_t>(KeeperApiVersion::WITH_MULTI_READ));
|
||||
}
|
||||
|
||||
#if USE_ROCKSDB
|
||||
using RocksDBOptions = std::unordered_map<std::string, std::string>;
|
||||
|
||||
static RocksDBOptions getOptionsFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & path)
|
||||
{
|
||||
RocksDBOptions options;
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(path, keys);
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
const String key_path = path + "." + key;
|
||||
options[key] = config.getString(key_path);
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
rocksdb::Status status;
|
||||
rocksdb::Options base;
|
||||
|
||||
base.create_if_missing = true;
|
||||
base.compression = rocksdb::CompressionType::kZSTD;
|
||||
base.statistics = rocksdb::CreateDBStatistics();
|
||||
/// It is too verbose by default, and in fact we don't care about rocksdb logs at all.
|
||||
base.info_log_level = rocksdb::ERROR_LEVEL;
|
||||
|
||||
rocksdb::Options merged = base;
|
||||
rocksdb::BlockBasedTableOptions table_options;
|
||||
|
||||
if (config.has("keeper_server.rocksdb.options"))
|
||||
{
|
||||
auto config_options = getOptionsFromConfig(config, "keeper_server.rocksdb.options");
|
||||
status = rocksdb::GetDBOptionsFromMap(merged, config_options, &merged);
|
||||
if (!status.ok())
|
||||
{
|
||||
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' : {}",
|
||||
status.ToString());
|
||||
}
|
||||
}
|
||||
if (config.has("rocksdb.column_family_options"))
|
||||
{
|
||||
auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options");
|
||||
status = rocksdb::GetColumnFamilyOptionsFromMap(merged, column_family_options, &merged);
|
||||
if (!status.ok())
|
||||
{
|
||||
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}", status.ToString());
|
||||
}
|
||||
}
|
||||
if (config.has("rocksdb.block_based_table_options"))
|
||||
{
|
||||
auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options");
|
||||
status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options);
|
||||
if (!status.ok())
|
||||
{
|
||||
throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}", status.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
merged.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
|
||||
return merged;
|
||||
}
|
||||
#endif
|
||||
|
||||
KeeperContext::Storage KeeperContext::getRocksDBPathFromConfig(const Poco::Util::AbstractConfiguration & config) const
|
||||
{
|
||||
const auto create_local_disk = [](const auto & path)
|
||||
{
|
||||
if (fs::exists(path))
|
||||
fs::remove_all(path);
|
||||
fs::create_directories(path);
|
||||
|
||||
return std::make_shared<DiskLocal>("LocalRocksDBDisk", path);
|
||||
};
|
||||
if (config.has("keeper_server.rocksdb_path"))
|
||||
return create_local_disk(config.getString("keeper_server.rocksdb_path"));
|
||||
|
||||
if (config.has("keeper_server.storage_path"))
|
||||
return create_local_disk(std::filesystem::path{config.getString("keeper_server.storage_path")} / "rocksdb");
|
||||
|
||||
if (standalone_keeper)
|
||||
return create_local_disk(std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "rocksdb");
|
||||
else
|
||||
return create_local_disk(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/rocksdb");
|
||||
}
|
||||
|
||||
void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_)
|
||||
{
|
||||
dispatcher = dispatcher_;
|
||||
@ -59,6 +159,14 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config,
|
||||
|
||||
initializeFeatureFlags(config);
|
||||
initializeDisks(config);
|
||||
|
||||
#if USE_ROCKSDB
|
||||
if (config.getBool("keeper_server.coordination_settings.experimental_use_rocksdb", false))
|
||||
{
|
||||
rocksdb_options = std::make_shared<rocksdb::Options>(getRocksDBOptionsFromConfig(config));
|
||||
digest_enabled = false; /// TODO: support digest
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -94,6 +202,8 @@ void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & co
|
||||
{
|
||||
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator);
|
||||
|
||||
rocksdb_storage = getRocksDBPathFromConfig(config);
|
||||
|
||||
log_storage = getLogsPathFromConfig(config);
|
||||
|
||||
if (config.has("keeper_server.latest_log_storage_disk"))
|
||||
@ -262,6 +372,37 @@ void KeeperContext::dumpConfiguration(WriteBufferFromOwnString & buf) const
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void KeeperContext::setRocksDBDisk(DiskPtr disk)
|
||||
{
|
||||
rocksdb_storage = std::move(disk);
|
||||
}
|
||||
|
||||
DiskPtr KeeperContext::getTemporaryRocksDBDisk() const
|
||||
{
|
||||
DiskPtr rocksdb_disk = getDisk(rocksdb_storage);
|
||||
if (!rocksdb_disk)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "rocksdb storage is not initialized");
|
||||
}
|
||||
auto uuid_str = formatUUID(UUIDHelpers::generateV4());
|
||||
String path_to_create = "rocks_" + std::string(uuid_str.data(), uuid_str.size());
|
||||
rocksdb_disk->createDirectory(path_to_create);
|
||||
return std::make_shared<DiskLocal>("LocalTmpRocksDBDisk", fullPath(rocksdb_disk, path_to_create));
|
||||
}
|
||||
|
||||
void KeeperContext::setRocksDBOptions(std::shared_ptr<rocksdb::Options> rocksdb_options_)
|
||||
{
|
||||
if (rocksdb_options_ != nullptr)
|
||||
rocksdb_options = rocksdb_options_;
|
||||
else
|
||||
{
|
||||
#if USE_ROCKSDB
|
||||
rocksdb_options = std::make_shared<rocksdb::Options>(getRocksDBOptionsFromConfig(Poco::Util::JSONConfiguration()));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const
|
||||
{
|
||||
const auto create_local_disk = [](const auto & path)
|
||||
|
@ -6,6 +6,11 @@
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
namespace rocksdb
|
||||
{
|
||||
struct Options;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -62,6 +67,12 @@ public:
|
||||
|
||||
constexpr KeeperDispatcher * getDispatcher() const { return dispatcher; }
|
||||
|
||||
void setRocksDBDisk(DiskPtr disk);
|
||||
DiskPtr getTemporaryRocksDBDisk() const;
|
||||
|
||||
void setRocksDBOptions(std::shared_ptr<rocksdb::Options> rocksdb_options_ = nullptr);
|
||||
std::shared_ptr<rocksdb::Options> getRocksDBOptions() const { return rocksdb_options; }
|
||||
|
||||
UInt64 getKeeperMemorySoftLimit() const { return memory_soft_limit; }
|
||||
void updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
@ -90,6 +101,7 @@ private:
|
||||
void initializeFeatureFlags(const Poco::Util::AbstractConfiguration & config);
|
||||
void initializeDisks(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
Storage getRocksDBPathFromConfig(const Poco::Util::AbstractConfiguration & config) const;
|
||||
Storage getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const;
|
||||
Storage getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const;
|
||||
Storage getStatePathFromConfig(const Poco::Util::AbstractConfiguration & config) const;
|
||||
@ -111,12 +123,15 @@ private:
|
||||
|
||||
std::shared_ptr<DiskSelector> disk_selector;
|
||||
|
||||
Storage rocksdb_storage;
|
||||
Storage log_storage;
|
||||
Storage latest_log_storage;
|
||||
Storage snapshot_storage;
|
||||
Storage latest_snapshot_storage;
|
||||
Storage state_file_storage;
|
||||
|
||||
std::shared_ptr<rocksdb::Options> rocksdb_options;
|
||||
|
||||
std::vector<std::string> old_log_disk_names;
|
||||
std::vector<std::string> old_snapshot_disk_names;
|
||||
|
||||
|
@ -117,13 +117,13 @@ void KeeperDispatcher::requestThread()
|
||||
RaftAppendResult prev_result = nullptr;
|
||||
/// Requests from previous iteration. We store them to be able
|
||||
/// to send errors to the client.
|
||||
KeeperStorage::RequestsForSessions prev_batch;
|
||||
KeeperStorageBase::RequestsForSessions prev_batch;
|
||||
|
||||
const auto & shutdown_called = keeper_context->isShutdownCalled();
|
||||
|
||||
while (!shutdown_called)
|
||||
{
|
||||
KeeperStorage::RequestForSession request;
|
||||
KeeperStorageBase::RequestForSession request;
|
||||
|
||||
auto coordination_settings = configuration_and_settings->coordination_settings;
|
||||
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
|
||||
@ -159,7 +159,7 @@ void KeeperDispatcher::requestThread()
|
||||
continue;
|
||||
}
|
||||
|
||||
KeeperStorage::RequestsForSessions current_batch;
|
||||
KeeperStorageBase::RequestsForSessions current_batch;
|
||||
size_t current_batch_bytes_size = 0;
|
||||
|
||||
bool has_read_request = false;
|
||||
@ -317,7 +317,7 @@ void KeeperDispatcher::responseThread()
|
||||
const auto & shutdown_called = keeper_context->isShutdownCalled();
|
||||
while (!shutdown_called)
|
||||
{
|
||||
KeeperStorage::ResponseForSession response_for_session;
|
||||
KeeperStorageBase::ResponseForSession response_for_session;
|
||||
|
||||
uint64_t max_wait = configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds();
|
||||
|
||||
@ -408,7 +408,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
|
||||
return false;
|
||||
}
|
||||
|
||||
KeeperStorage::RequestForSession request_info;
|
||||
KeeperStorageBase::RequestForSession request_info;
|
||||
request_info.request = request;
|
||||
using namespace std::chrono;
|
||||
request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
|
||||
@ -454,7 +454,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
|
||||
snapshots_queue,
|
||||
keeper_context,
|
||||
snapshot_s3,
|
||||
[this](uint64_t /*log_idx*/, const KeeperStorage::RequestForSession & request_for_session)
|
||||
[this](uint64_t /*log_idx*/, const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
{
|
||||
{
|
||||
/// check if we have queue of read requests depending on this request to be committed
|
||||
@ -546,7 +546,7 @@ void KeeperDispatcher::shutdown()
|
||||
update_configuration_thread.join();
|
||||
}
|
||||
|
||||
KeeperStorage::RequestForSession request_for_session;
|
||||
KeeperStorageBase::RequestForSession request_for_session;
|
||||
|
||||
/// Set session expired for all pending requests
|
||||
while (requests_queue && requests_queue->tryPop(request_for_session))
|
||||
@ -557,7 +557,7 @@ void KeeperDispatcher::shutdown()
|
||||
setResponse(request_for_session.session_id, response);
|
||||
}
|
||||
|
||||
KeeperStorage::RequestsForSessions close_requests;
|
||||
KeeperStorageBase::RequestsForSessions close_requests;
|
||||
{
|
||||
/// Clear all registered sessions
|
||||
std::lock_guard lock(session_to_response_callback_mutex);
|
||||
@ -571,7 +571,7 @@ void KeeperDispatcher::shutdown()
|
||||
auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
|
||||
request->xid = Coordination::CLOSE_XID;
|
||||
using namespace std::chrono;
|
||||
KeeperStorage::RequestForSession request_info
|
||||
KeeperStorageBase::RequestForSession request_info
|
||||
{
|
||||
.session_id = session,
|
||||
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
|
||||
@ -669,7 +669,7 @@ void KeeperDispatcher::sessionCleanerTask()
|
||||
auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close);
|
||||
request->xid = Coordination::CLOSE_XID;
|
||||
using namespace std::chrono;
|
||||
KeeperStorage::RequestForSession request_info
|
||||
KeeperStorageBase::RequestForSession request_info
|
||||
{
|
||||
.session_id = dead_session,
|
||||
.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(),
|
||||
@ -717,16 +717,16 @@ void KeeperDispatcher::finishSession(int64_t session_id)
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error)
|
||||
void KeeperDispatcher::addErrorResponses(const KeeperStorageBase::RequestsForSessions & requests_for_sessions, Coordination::Error error)
|
||||
{
|
||||
for (const auto & request_for_session : requests_for_sessions)
|
||||
{
|
||||
KeeperStorage::ResponsesForSessions responses;
|
||||
KeeperStorageBase::ResponsesForSessions responses;
|
||||
auto response = request_for_session.request->makeResponse();
|
||||
response->xid = request_for_session.request->xid;
|
||||
response->zxid = 0;
|
||||
response->error = error;
|
||||
if (!responses_queue.push(DB::KeeperStorage::ResponseForSession{request_for_session.session_id, response}))
|
||||
if (!responses_queue.push(DB::KeeperStorageBase::ResponseForSession{request_for_session.session_id, response}))
|
||||
throw Exception(ErrorCodes::SYSTEM_ERROR,
|
||||
"Could not push error response xid {} zxid {} error message {} to responses queue",
|
||||
response->xid,
|
||||
@ -736,7 +736,7 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperDispatcher::forceWaitAndProcessResult(
|
||||
RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success)
|
||||
RaftAppendResult & result, KeeperStorageBase::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success)
|
||||
{
|
||||
if (!result->has_result())
|
||||
result->get();
|
||||
@ -761,7 +761,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
|
||||
{
|
||||
/// New session id allocation is a special request, because we cannot process it in normal
|
||||
/// way: get request -> put to raft -> set response for registered callback.
|
||||
KeeperStorage::RequestForSession request_info;
|
||||
KeeperStorageBase::RequestForSession request_info;
|
||||
std::shared_ptr<Coordination::ZooKeeperSessionIDRequest> request = std::make_shared<Coordination::ZooKeeperSessionIDRequest>();
|
||||
/// Internal session id. It's a temporary number which is unique for each client on this server
|
||||
/// but can be same on different servers.
|
||||
|
@ -26,7 +26,7 @@ using ZooKeeperResponseCallback = std::function<void(const Coordination::ZooKeep
|
||||
class KeeperDispatcher
|
||||
{
|
||||
private:
|
||||
using RequestsQueue = ConcurrentBoundedQueue<KeeperStorage::RequestForSession>;
|
||||
using RequestsQueue = ConcurrentBoundedQueue<KeeperStorageBase::RequestForSession>;
|
||||
using SessionToResponseCallback = std::unordered_map<int64_t, ZooKeeperResponseCallback>;
|
||||
using ClusterUpdateQueue = ConcurrentBoundedQueue<ClusterUpdateAction>;
|
||||
|
||||
@ -95,18 +95,18 @@ private:
|
||||
|
||||
/// Add error responses for requests to responses queue.
|
||||
/// Clears requests.
|
||||
void addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error);
|
||||
void addErrorResponses(const KeeperStorageBase::RequestsForSessions & requests_for_sessions, Coordination::Error error);
|
||||
|
||||
/// Forcefully wait for result and sets errors if something when wrong.
|
||||
/// Clears both arguments
|
||||
nuraft::ptr<nuraft::buffer> forceWaitAndProcessResult(
|
||||
RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success);
|
||||
RaftAppendResult & result, KeeperStorageBase::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success);
|
||||
|
||||
public:
|
||||
std::mutex read_request_queue_mutex;
|
||||
|
||||
/// queue of read requests that can be processed after a request with specific session ID and XID is committed
|
||||
std::unordered_map<int64_t, std::unordered_map<Coordination::XID, KeeperStorage::RequestsForSessions>> read_request_queue;
|
||||
std::unordered_map<int64_t, std::unordered_map<Coordination::XID, KeeperStorageBase::RequestsForSessions>> read_request_queue;
|
||||
|
||||
/// Just allocate some objects, real initialization is done by `intialize method`
|
||||
KeeperDispatcher();
|
||||
@ -192,7 +192,7 @@ public:
|
||||
|
||||
Keeper4LWInfo getKeeper4LWInfo() const;
|
||||
|
||||
const KeeperStateMachine & getStateMachine() const
|
||||
const IKeeperStateMachine & getStateMachine() const
|
||||
{
|
||||
return *server->getKeeperStateMachine();
|
||||
}
|
||||
|
@ -123,7 +123,7 @@ KeeperServer::KeeperServer(
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
KeeperContextPtr keeper_context_,
|
||||
KeeperSnapshotManagerS3 & snapshot_manager_s3,
|
||||
KeeperStateMachine::CommitCallback commit_callback)
|
||||
IKeeperStateMachine::CommitCallback commit_callback)
|
||||
: server_id(configuration_and_settings_->server_id)
|
||||
, log(getLogger("KeeperServer"))
|
||||
, is_recovering(config.getBool("keeper_server.force_recovery", false))
|
||||
@ -134,7 +134,22 @@ KeeperServer::KeeperServer(
|
||||
if (keeper_context->getCoordinationSettings()->quorum_reads)
|
||||
LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
|
||||
|
||||
state_machine = nuraft::cs_new<KeeperStateMachine>(
|
||||
#if USE_ROCKSDB
|
||||
const auto & coordination_settings = keeper_context->getCoordinationSettings();
|
||||
if (coordination_settings->experimental_use_rocksdb)
|
||||
{
|
||||
state_machine = nuraft::cs_new<KeeperStateMachine<KeeperRocksStorage>>(
|
||||
responses_queue_,
|
||||
snapshots_queue_,
|
||||
keeper_context,
|
||||
config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr,
|
||||
commit_callback,
|
||||
checkAndGetSuperdigest(configuration_and_settings_->super_digest));
|
||||
LOG_WARNING(log, "Use RocksDB as Keeper backend storage.");
|
||||
}
|
||||
else
|
||||
#endif
|
||||
state_machine = nuraft::cs_new<KeeperStateMachine<KeeperMemoryStorage>>(
|
||||
responses_queue_,
|
||||
snapshots_queue_,
|
||||
keeper_context,
|
||||
@ -522,7 +537,7 @@ namespace
|
||||
{
|
||||
|
||||
// Serialize the request for the log entry
|
||||
nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(const KeeperStorage::RequestForSession & request_for_session)
|
||||
nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
{
|
||||
DB::WriteBufferFromNuraftBuffer write_buf;
|
||||
DB::writeIntBinary(request_for_session.session_id, write_buf);
|
||||
@ -530,7 +545,7 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(const KeeperStorage::RequestFor
|
||||
DB::writeIntBinary(request_for_session.time, write_buf);
|
||||
/// we fill with dummy values to eliminate unnecessary copy later on when we will write correct values
|
||||
DB::writeIntBinary(static_cast<int64_t>(0), write_buf); /// zxid
|
||||
DB::writeIntBinary(KeeperStorage::DigestVersion::NO_DIGEST, write_buf); /// digest version or NO_DIGEST flag
|
||||
DB::writeIntBinary(KeeperStorageBase::DigestVersion::NO_DIGEST, write_buf); /// digest version or NO_DIGEST flag
|
||||
DB::writeIntBinary(static_cast<uint64_t>(0), write_buf); /// digest value
|
||||
/// if new fields are added, update KeeperStateMachine::ZooKeeperLogSerializationVersion along with parseRequest function and PreAppendLog callback handler
|
||||
return write_buf.getBuffer();
|
||||
@ -538,7 +553,7 @@ nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(const KeeperStorage::RequestFor
|
||||
|
||||
}
|
||||
|
||||
void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession & request_for_session)
|
||||
void KeeperServer::putLocalReadRequest(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
{
|
||||
if (!request_for_session.request->isReadRequest())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot process non-read request locally");
|
||||
@ -546,7 +561,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession &
|
||||
state_machine->processReadRequest(request_for_session);
|
||||
}
|
||||
|
||||
RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
|
||||
RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorageBase::RequestsForSessions & requests_for_sessions)
|
||||
{
|
||||
std::vector<nuraft::ptr<nuraft::buffer>> entries;
|
||||
entries.reserve(requests_for_sessions.size());
|
||||
@ -789,7 +804,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
|
||||
|
||||
auto entry_buf = entry->get_buf_ptr();
|
||||
|
||||
KeeperStateMachine::ZooKeeperLogSerializationVersion serialization_version;
|
||||
IKeeperStateMachine::ZooKeeperLogSerializationVersion serialization_version;
|
||||
auto request_for_session = state_machine->parseRequest(*entry_buf, /*final=*/false, &serialization_version);
|
||||
request_for_session->zxid = next_zxid;
|
||||
if (!state_machine->preprocess(*request_for_session))
|
||||
@ -799,10 +814,10 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
|
||||
|
||||
/// older versions of Keeper can send logs that are missing some fields
|
||||
size_t bytes_missing = 0;
|
||||
if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME)
|
||||
if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME)
|
||||
bytes_missing += sizeof(request_for_session->time);
|
||||
|
||||
if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_ZXID_DIGEST)
|
||||
if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_ZXID_DIGEST)
|
||||
bytes_missing += sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version) + sizeof(request_for_session->digest->value);
|
||||
|
||||
if (bytes_missing != 0)
|
||||
@ -816,19 +831,19 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
|
||||
size_t write_buffer_header_size
|
||||
= sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version) + sizeof(request_for_session->digest->value);
|
||||
|
||||
if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME)
|
||||
if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME)
|
||||
write_buffer_header_size += sizeof(request_for_session->time);
|
||||
|
||||
auto * buffer_start = reinterpret_cast<BufferBase::Position>(entry_buf->data_begin() + entry_buf->size() - write_buffer_header_size);
|
||||
|
||||
WriteBufferFromPointer write_buf(buffer_start, write_buffer_header_size);
|
||||
|
||||
if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME)
|
||||
if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME)
|
||||
writeIntBinary(request_for_session->time, write_buf);
|
||||
|
||||
writeIntBinary(request_for_session->zxid, write_buf);
|
||||
writeIntBinary(request_for_session->digest->version, write_buf);
|
||||
if (request_for_session->digest->version != KeeperStorage::NO_DIGEST)
|
||||
if (request_for_session->digest->version != KeeperStorageBase::NO_DIGEST)
|
||||
writeIntBinary(request_for_session->digest->value, write_buf);
|
||||
|
||||
write_buf.finalize();
|
||||
|
@ -24,7 +24,7 @@ class KeeperServer
|
||||
private:
|
||||
const int server_id;
|
||||
|
||||
nuraft::ptr<KeeperStateMachine> state_machine;
|
||||
nuraft::ptr<IKeeperStateMachine> state_machine;
|
||||
|
||||
nuraft::ptr<KeeperStateManager> state_manager;
|
||||
|
||||
@ -79,26 +79,26 @@ public:
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
KeeperContextPtr keeper_context_,
|
||||
KeeperSnapshotManagerS3 & snapshot_manager_s3,
|
||||
KeeperStateMachine::CommitCallback commit_callback);
|
||||
IKeeperStateMachine::CommitCallback commit_callback);
|
||||
|
||||
/// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings.
|
||||
void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true);
|
||||
|
||||
/// Put local read request and execute in state machine directly and response into
|
||||
/// responses queue
|
||||
void putLocalReadRequest(const KeeperStorage::RequestForSession & request);
|
||||
void putLocalReadRequest(const KeeperStorageBase::RequestForSession & request);
|
||||
|
||||
bool isRecovering() const { return is_recovering; }
|
||||
bool reconfigEnabled() const { return enable_reconfiguration; }
|
||||
|
||||
/// Put batch of requests into Raft and get result of put. Responses will be set separately into
|
||||
/// responses_queue.
|
||||
RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
|
||||
RaftAppendResult putRequestBatch(const KeeperStorageBase::RequestsForSessions & requests);
|
||||
|
||||
/// Return set of the non-active sessions
|
||||
std::vector<int64_t> getDeadSessions();
|
||||
|
||||
nuraft::ptr<KeeperStateMachine> getKeeperStateMachine() const { return state_machine; }
|
||||
nuraft::ptr<IKeeperStateMachine> getKeeperStateMachine() const { return state_machine; }
|
||||
|
||||
void forceRecovery();
|
||||
|
||||
|
@ -66,7 +66,8 @@ namespace
|
||||
return base;
|
||||
}
|
||||
|
||||
void writeNode(const KeeperStorage::Node & node, SnapshotVersion version, WriteBuffer & out)
|
||||
template<typename Node>
|
||||
void writeNode(const Node & node, SnapshotVersion version, WriteBuffer & out)
|
||||
{
|
||||
writeBinary(node.getData(), out);
|
||||
|
||||
@ -86,7 +87,7 @@ namespace
|
||||
writeBinary(node.aversion, out);
|
||||
writeBinary(node.ephemeralOwner(), out);
|
||||
if (version < SnapshotVersion::V6)
|
||||
writeBinary(static_cast<int32_t>(node.data_size), out);
|
||||
writeBinary(static_cast<int32_t>(node.getData().size()), out);
|
||||
writeBinary(node.numChildren(), out);
|
||||
writeBinary(node.pzxid, out);
|
||||
|
||||
@ -96,7 +97,8 @@ namespace
|
||||
writeBinary(node.sizeInBytes(), out);
|
||||
}
|
||||
|
||||
void readNode(KeeperStorage::Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map)
|
||||
template<typename Node>
|
||||
void readNode(Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map)
|
||||
{
|
||||
readVarUInt(node.data_size, in);
|
||||
if (node.data_size != 0)
|
||||
@ -195,7 +197,8 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context)
|
||||
template<typename Storage>
|
||||
void KeeperStorageSnapshot<Storage>::serialize(const KeeperStorageSnapshot<Storage> & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context)
|
||||
{
|
||||
writeBinary(static_cast<uint8_t>(snapshot.version), out);
|
||||
serializeSnapshotMetadata(snapshot.snapshot_meta, out);
|
||||
@ -205,11 +208,11 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
|
||||
writeBinary(snapshot.zxid, out);
|
||||
if (keeper_context->digestEnabled())
|
||||
{
|
||||
writeBinary(static_cast<uint8_t>(KeeperStorage::CURRENT_DIGEST_VERSION), out);
|
||||
writeBinary(static_cast<uint8_t>(Storage::CURRENT_DIGEST_VERSION), out);
|
||||
writeBinary(snapshot.nodes_digest, out);
|
||||
}
|
||||
else
|
||||
writeBinary(static_cast<uint8_t>(KeeperStorage::NO_DIGEST), out);
|
||||
writeBinary(static_cast<uint8_t>(Storage::NO_DIGEST), out);
|
||||
}
|
||||
|
||||
writeBinary(snapshot.session_id, out);
|
||||
@ -255,7 +258,6 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
|
||||
/// slightly bigger than required.
|
||||
if (node.mzxid > snapshot.zxid)
|
||||
break;
|
||||
|
||||
writeBinary(path, out);
|
||||
writeNode(node, snapshot.version, out);
|
||||
|
||||
@ -282,7 +284,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
|
||||
writeBinary(session_id, out);
|
||||
writeBinary(timeout, out);
|
||||
|
||||
KeeperStorage::AuthIDs ids;
|
||||
KeeperStorageBase::AuthIDs ids;
|
||||
if (snapshot.session_and_auth.contains(session_id))
|
||||
ids = snapshot.session_and_auth.at(session_id);
|
||||
|
||||
@ -303,7 +305,8 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context)
|
||||
template<typename Storage>
|
||||
void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<Storage> & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context)
|
||||
{
|
||||
uint8_t version;
|
||||
readBinary(version, in);
|
||||
@ -312,7 +315,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported snapshot version {}", version);
|
||||
|
||||
deserialization_result.snapshot_meta = deserializeSnapshotMetadata(in);
|
||||
KeeperStorage & storage = *deserialization_result.storage;
|
||||
Storage & storage = *deserialization_result.storage;
|
||||
|
||||
bool recalculate_digest = keeper_context->digestEnabled();
|
||||
if (version >= SnapshotVersion::V5)
|
||||
@ -320,11 +323,11 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
readBinary(storage.zxid, in);
|
||||
uint8_t digest_version;
|
||||
readBinary(digest_version, in);
|
||||
if (digest_version != KeeperStorage::DigestVersion::NO_DIGEST)
|
||||
if (digest_version != Storage::DigestVersion::NO_DIGEST)
|
||||
{
|
||||
uint64_t nodes_digest;
|
||||
readBinary(nodes_digest, in);
|
||||
if (digest_version == KeeperStorage::CURRENT_DIGEST_VERSION)
|
||||
if (digest_version == Storage::CURRENT_DIGEST_VERSION)
|
||||
{
|
||||
storage.nodes_digest = nodes_digest;
|
||||
recalculate_digest = false;
|
||||
@ -374,7 +377,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
|
||||
size_t snapshot_container_size;
|
||||
readBinary(snapshot_container_size, in);
|
||||
|
||||
if constexpr (!use_rocksdb)
|
||||
storage.container.reserve(snapshot_container_size);
|
||||
|
||||
if (recalculate_digest)
|
||||
@ -389,7 +392,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
in.readStrict(path_data.get(), path_size);
|
||||
std::string_view path{path_data.get(), path_size};
|
||||
|
||||
KeeperStorage::Node node{};
|
||||
typename Storage::Node node{};
|
||||
readNode(node, in, current_version, storage.acl_map);
|
||||
|
||||
using enum Coordination::PathMatchResult;
|
||||
@ -421,7 +424,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
if (keeper_context->ignoreSystemPathOnStartup() || keeper_context->getServerState() != KeeperContext::Phase::INIT)
|
||||
{
|
||||
LOG_ERROR(getLogger("KeeperSnapshotManager"), "{}. Ignoring it", get_error_msg());
|
||||
node = KeeperStorage::Node{};
|
||||
node = typename Storage::Node{};
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
@ -433,6 +436,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
}
|
||||
|
||||
auto ephemeral_owner = node.ephemeralOwner();
|
||||
if constexpr (!use_rocksdb)
|
||||
if (!node.isEphemeral() && node.numChildren() > 0)
|
||||
node.getChildren().reserve(node.numChildren());
|
||||
|
||||
@ -447,13 +451,15 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
|
||||
LOG_TRACE(getLogger("KeeperSnapshotManager"), "Building structure for children nodes");
|
||||
|
||||
if constexpr (!use_rocksdb)
|
||||
{
|
||||
for (const auto & itr : storage.container)
|
||||
{
|
||||
if (itr.key != "/")
|
||||
{
|
||||
auto parent_path = parentNodePath(itr.key);
|
||||
storage.container.updateValue(
|
||||
parent_path, [path = itr.key](KeeperStorage::Node & value) { value.addChild(getBaseNodeName(path)); });
|
||||
parent_path, [path = itr.key](typename Storage::Node & value) { value.addChild(getBaseNodeName(path)); });
|
||||
}
|
||||
}
|
||||
|
||||
@ -475,7 +481,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
size_t active_sessions_size;
|
||||
readBinary(active_sessions_size, in);
|
||||
@ -493,14 +499,14 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
size_t session_auths_size;
|
||||
readBinary(session_auths_size, in);
|
||||
|
||||
KeeperStorage::AuthIDs ids;
|
||||
typename Storage::AuthIDs ids;
|
||||
size_t session_auth_counter = 0;
|
||||
while (session_auth_counter < session_auths_size)
|
||||
{
|
||||
String scheme, id;
|
||||
readBinary(scheme, in);
|
||||
readBinary(id, in);
|
||||
ids.emplace_back(KeeperStorage::AuthID{scheme, id});
|
||||
ids.emplace_back(typename Storage::AuthID{scheme, id});
|
||||
|
||||
session_auth_counter++;
|
||||
}
|
||||
@ -523,7 +529,8 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
|
||||
}
|
||||
}
|
||||
|
||||
KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_)
|
||||
template<typename Storage>
|
||||
KeeperStorageSnapshot<Storage>::KeeperStorageSnapshot(Storage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_)
|
||||
: storage(storage_)
|
||||
, snapshot_meta(std::make_shared<SnapshotMetadata>(up_to_log_idx_, 0, std::make_shared<nuraft::cluster_config>()))
|
||||
, session_id(storage->session_id_counter)
|
||||
@ -540,8 +547,9 @@ KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t
|
||||
session_and_auth = storage->session_and_auth;
|
||||
}
|
||||
|
||||
KeeperStorageSnapshot::KeeperStorageSnapshot(
|
||||
KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_)
|
||||
template<typename Storage>
|
||||
KeeperStorageSnapshot<Storage>::KeeperStorageSnapshot(
|
||||
Storage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_)
|
||||
: storage(storage_)
|
||||
, snapshot_meta(snapshot_meta_)
|
||||
, session_id(storage->session_id_counter)
|
||||
@ -558,12 +566,14 @@ KeeperStorageSnapshot::KeeperStorageSnapshot(
|
||||
session_and_auth = storage->session_and_auth;
|
||||
}
|
||||
|
||||
KeeperStorageSnapshot::~KeeperStorageSnapshot()
|
||||
template<typename Storage>
|
||||
KeeperStorageSnapshot<Storage>::~KeeperStorageSnapshot()
|
||||
{
|
||||
storage->disableSnapshotMode();
|
||||
}
|
||||
|
||||
KeeperSnapshotManager::KeeperSnapshotManager(
|
||||
template<typename Storage>
|
||||
KeeperSnapshotManager<Storage>::KeeperSnapshotManager(
|
||||
size_t snapshots_to_keep_,
|
||||
const KeeperContextPtr & keeper_context_,
|
||||
bool compress_snapshots_zstd_,
|
||||
@ -651,7 +661,8 @@ KeeperSnapshotManager::KeeperSnapshotManager(
|
||||
moveSnapshotsIfNeeded();
|
||||
}
|
||||
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx)
|
||||
template<typename Storage>
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager<Storage>::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx)
|
||||
{
|
||||
ReadBufferFromNuraftBuffer reader(buffer);
|
||||
|
||||
@ -680,7 +691,8 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft:
|
||||
return snapshot_file_info;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk()
|
||||
template<typename Storage>
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager<Storage>::deserializeLatestSnapshotBufferFromDisk()
|
||||
{
|
||||
while (!existing_snapshots.empty())
|
||||
{
|
||||
@ -701,7 +713,8 @@ nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeLatestSnapshotBuff
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const
|
||||
template<typename Storage>
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager<Storage>::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const
|
||||
{
|
||||
const auto & [snapshot_path, snapshot_disk, size] = *existing_snapshots.at(up_to_log_idx);
|
||||
WriteBufferFromNuraftBuffer writer;
|
||||
@ -710,7 +723,8 @@ nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::deserializeSnapshotBufferFrom
|
||||
return writer.getBuffer();
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const
|
||||
template<typename Storage>
|
||||
nuraft::ptr<nuraft::buffer> KeeperSnapshotManager<Storage>::serializeSnapshotToBuffer(const KeeperStorageSnapshot<Storage> & snapshot) const
|
||||
{
|
||||
std::unique_ptr<WriteBufferFromNuraftBuffer> writer = std::make_unique<WriteBufferFromNuraftBuffer>();
|
||||
auto * buffer_raw_ptr = writer.get();
|
||||
@ -720,13 +734,13 @@ nuraft::ptr<nuraft::buffer> KeeperSnapshotManager::serializeSnapshotToBuffer(con
|
||||
else
|
||||
compressed_writer = std::make_unique<CompressedWriteBuffer>(*writer);
|
||||
|
||||
KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context);
|
||||
KeeperStorageSnapshot<Storage>::serialize(snapshot, *compressed_writer, keeper_context);
|
||||
compressed_writer->finalize();
|
||||
return buffer_raw_ptr->getBuffer();
|
||||
}
|
||||
|
||||
|
||||
bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr<nuraft::buffer> buffer)
|
||||
template<typename Storage>
|
||||
bool KeeperSnapshotManager<Storage>::isZstdCompressed(nuraft::ptr<nuraft::buffer> buffer)
|
||||
{
|
||||
static constexpr unsigned char ZSTD_COMPRESSED_MAGIC[4] = {0x28, 0xB5, 0x2F, 0xFD};
|
||||
|
||||
@ -737,7 +751,8 @@ bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr<nuraft::buffer> buffer)
|
||||
return memcmp(magic_from_buffer, ZSTD_COMPRESSED_MAGIC, 4) == 0;
|
||||
}
|
||||
|
||||
SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
|
||||
template<typename Storage>
|
||||
SnapshotDeserializationResult<Storage> KeeperSnapshotManager<Storage>::deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const
|
||||
{
|
||||
bool is_zstd_compressed = isZstdCompressed(buffer);
|
||||
|
||||
@ -749,14 +764,15 @@ SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuff
|
||||
else
|
||||
compressed_reader = std::make_unique<CompressedReadBuffer>(*reader);
|
||||
|
||||
SnapshotDeserializationResult result;
|
||||
result.storage = std::make_unique<KeeperStorage>(storage_tick_time, superdigest, keeper_context, /* initialize_system_nodes */ false);
|
||||
KeeperStorageSnapshot::deserialize(result, *compressed_reader, keeper_context);
|
||||
SnapshotDeserializationResult<Storage> result;
|
||||
result.storage = std::make_unique<Storage>(storage_tick_time, superdigest, keeper_context, /* initialize_system_nodes */ false);
|
||||
KeeperStorageSnapshot<Storage>::deserialize(result, *compressed_reader, keeper_context);
|
||||
result.storage->initializeSystemNodes();
|
||||
return result;
|
||||
}
|
||||
|
||||
SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot()
|
||||
template<typename Storage>
|
||||
SnapshotDeserializationResult<Storage> KeeperSnapshotManager<Storage>::restoreFromLatestSnapshot()
|
||||
{
|
||||
if (existing_snapshots.empty())
|
||||
return {};
|
||||
@ -767,23 +783,27 @@ SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot()
|
||||
return deserializeSnapshotFromBuffer(buffer);
|
||||
}
|
||||
|
||||
DiskPtr KeeperSnapshotManager::getDisk() const
|
||||
template<typename Storage>
|
||||
DiskPtr KeeperSnapshotManager<Storage>::getDisk() const
|
||||
{
|
||||
return keeper_context->getSnapshotDisk();
|
||||
}
|
||||
|
||||
DiskPtr KeeperSnapshotManager::getLatestSnapshotDisk() const
|
||||
template<typename Storage>
|
||||
DiskPtr KeeperSnapshotManager<Storage>::getLatestSnapshotDisk() const
|
||||
{
|
||||
return keeper_context->getLatestSnapshotDisk();
|
||||
}
|
||||
|
||||
void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded()
|
||||
template<typename Storage>
|
||||
void KeeperSnapshotManager<Storage>::removeOutdatedSnapshotsIfNeeded()
|
||||
{
|
||||
while (existing_snapshots.size() > snapshots_to_keep)
|
||||
removeSnapshot(existing_snapshots.begin()->first);
|
||||
}
|
||||
|
||||
void KeeperSnapshotManager::moveSnapshotsIfNeeded()
|
||||
template<typename Storage>
|
||||
void KeeperSnapshotManager<Storage>::moveSnapshotsIfNeeded()
|
||||
{
|
||||
/// move snapshots to correct disks
|
||||
|
||||
@ -813,7 +833,8 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded()
|
||||
|
||||
}
|
||||
|
||||
void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx)
|
||||
template<typename Storage>
|
||||
void KeeperSnapshotManager<Storage>::removeSnapshot(uint64_t log_idx)
|
||||
{
|
||||
auto itr = existing_snapshots.find(log_idx);
|
||||
if (itr == existing_snapshots.end())
|
||||
@ -823,7 +844,8 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx)
|
||||
existing_snapshots.erase(itr);
|
||||
}
|
||||
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot)
|
||||
template<typename Storage>
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager<Storage>::serializeSnapshotToDisk(const KeeperStorageSnapshot<Storage> & snapshot)
|
||||
{
|
||||
auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx();
|
||||
auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd);
|
||||
@ -842,7 +864,7 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperS
|
||||
else
|
||||
compressed_writer = std::make_unique<CompressedWriteBuffer>(*writer);
|
||||
|
||||
KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context);
|
||||
KeeperStorageSnapshot<Storage>::serialize(snapshot, *compressed_writer, keeper_context);
|
||||
compressed_writer->finalize();
|
||||
compressed_writer->sync();
|
||||
|
||||
@ -864,14 +886,16 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperS
|
||||
return snapshot_file_info;
|
||||
}
|
||||
|
||||
size_t KeeperSnapshotManager::getLatestSnapshotIndex() const
|
||||
template<typename Storage>
|
||||
size_t KeeperSnapshotManager<Storage>::getLatestSnapshotIndex() const
|
||||
{
|
||||
if (!existing_snapshots.empty())
|
||||
return existing_snapshots.rbegin()->first;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const
|
||||
template<typename Storage>
|
||||
SnapshotFileInfoPtr KeeperSnapshotManager<Storage>::getLatestSnapshotInfo() const
|
||||
{
|
||||
if (!existing_snapshots.empty())
|
||||
{
|
||||
@ -890,4 +914,10 @@ SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template struct KeeperStorageSnapshot<KeeperMemoryStorage>;
|
||||
template class KeeperSnapshotManager<KeeperMemoryStorage>;
|
||||
#if USE_ROCKSDB
|
||||
template struct KeeperStorageSnapshot<KeeperRocksStorage>;
|
||||
template class KeeperSnapshotManager<KeeperRocksStorage>;
|
||||
#endif
|
||||
}
|
||||
|
@ -34,10 +34,11 @@ enum SnapshotVersion : uint8_t
|
||||
static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V6;
|
||||
|
||||
/// What is stored in binary snapshot
|
||||
template<typename Storage>
|
||||
struct SnapshotDeserializationResult
|
||||
{
|
||||
/// Storage
|
||||
KeeperStoragePtr storage;
|
||||
std::unique_ptr<Storage> storage;
|
||||
/// Snapshot metadata (up_to_log_idx and so on)
|
||||
SnapshotMetadataPtr snapshot_meta;
|
||||
/// Cluster config
|
||||
@ -52,21 +53,31 @@ struct SnapshotDeserializationResult
|
||||
///
|
||||
/// This representation of snapshot have to be serialized into NuRaft
|
||||
/// buffer and send over network or saved to file.
|
||||
template<typename Storage>
|
||||
struct KeeperStorageSnapshot
|
||||
{
|
||||
#if USE_ROCKSDB
|
||||
static constexpr bool use_rocksdb = std::is_same_v<Storage, KeeperRocksStorage>;
|
||||
#else
|
||||
static constexpr bool use_rocksdb = false;
|
||||
#endif
|
||||
|
||||
public:
|
||||
KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_ = nullptr);
|
||||
KeeperStorageSnapshot(Storage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_ = nullptr);
|
||||
|
||||
KeeperStorageSnapshot(
|
||||
KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_ = nullptr);
|
||||
Storage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_ = nullptr);
|
||||
|
||||
KeeperStorageSnapshot(const KeeperStorageSnapshot<Storage>&) = delete;
|
||||
KeeperStorageSnapshot(KeeperStorageSnapshot<Storage>&&) = default;
|
||||
|
||||
~KeeperStorageSnapshot();
|
||||
|
||||
static void serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context);
|
||||
static void serialize(const KeeperStorageSnapshot<Storage> & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context);
|
||||
|
||||
static void deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context);
|
||||
static void deserialize(SnapshotDeserializationResult<Storage> & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context);
|
||||
|
||||
KeeperStorage * storage;
|
||||
Storage * storage;
|
||||
|
||||
SnapshotVersion version = CURRENT_SNAPSHOT_VERSION;
|
||||
/// Snapshot metadata
|
||||
@ -77,11 +88,11 @@ public:
|
||||
/// so we have for loop for (i = 0; i < snapshot_container_size; ++i) { doSmth(begin + i); }
|
||||
size_t snapshot_container_size;
|
||||
/// Iterator to the start of the storage
|
||||
KeeperStorage::Container::const_iterator begin;
|
||||
Storage::Container::const_iterator begin;
|
||||
/// Active sessions and their timeouts
|
||||
SessionAndTimeout session_and_timeout;
|
||||
/// Sessions credentials
|
||||
KeeperStorage::SessionAndAuth session_and_auth;
|
||||
Storage::SessionAndAuth session_and_auth;
|
||||
/// ACLs cache for better performance. Without we cannot deserialize storage.
|
||||
std::unordered_map<uint64_t, Coordination::ACLs> acl_map;
|
||||
/// Cluster config from snapshot, can be empty
|
||||
@ -105,14 +116,16 @@ struct SnapshotFileInfo
|
||||
};
|
||||
|
||||
using SnapshotFileInfoPtr = std::shared_ptr<SnapshotFileInfo>;
|
||||
|
||||
using KeeperStorageSnapshotPtr = std::shared_ptr<KeeperStorageSnapshot>;
|
||||
using CreateSnapshotCallback = std::function<std::shared_ptr<SnapshotFileInfo>(KeeperStorageSnapshotPtr &&, bool)>;
|
||||
|
||||
using SnapshotMetaAndStorage = std::pair<SnapshotMetadataPtr, KeeperStoragePtr>;
|
||||
#if USE_ROCKSDB
|
||||
using KeeperStorageSnapshotPtr = std::variant<std::shared_ptr<KeeperStorageSnapshot<KeeperMemoryStorage>>, std::shared_ptr<KeeperStorageSnapshot<KeeperRocksStorage>>>;
|
||||
#else
|
||||
using KeeperStorageSnapshotPtr = std::variant<std::shared_ptr<KeeperStorageSnapshot<KeeperMemoryStorage>>>;
|
||||
#endif
|
||||
using CreateSnapshotCallback = std::function<SnapshotFileInfoPtr(KeeperStorageSnapshotPtr &&, bool)>;
|
||||
|
||||
/// Class responsible for snapshots serialization and deserialization. Each snapshot
|
||||
/// has it's path on disk and log index.
|
||||
template<typename Storage>
|
||||
class KeeperSnapshotManager
|
||||
{
|
||||
public:
|
||||
@ -124,18 +137,18 @@ public:
|
||||
size_t storage_tick_time_ = 500);
|
||||
|
||||
/// Restore storage from latest available snapshot
|
||||
SnapshotDeserializationResult restoreFromLatestSnapshot();
|
||||
SnapshotDeserializationResult<Storage> restoreFromLatestSnapshot();
|
||||
|
||||
/// Compress snapshot and serialize it to buffer
|
||||
nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const;
|
||||
nuraft::ptr<nuraft::buffer> serializeSnapshotToBuffer(const KeeperStorageSnapshot<Storage> & snapshot) const;
|
||||
|
||||
/// Serialize already compressed snapshot to disk (return path)
|
||||
SnapshotFileInfoPtr serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx);
|
||||
|
||||
/// Serialize snapshot directly to disk
|
||||
SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot);
|
||||
SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot<Storage> & snapshot);
|
||||
|
||||
SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
|
||||
SnapshotDeserializationResult<Storage> deserializeSnapshotFromBuffer(nuraft::ptr<nuraft::buffer> buffer) const;
|
||||
|
||||
/// Deserialize snapshot with log index up_to_log_idx from disk into compressed nuraft buffer.
|
||||
nuraft::ptr<nuraft::buffer> deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const;
|
||||
|
@ -44,7 +44,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
KeeperStateMachine::KeeperStateMachine(
|
||||
IKeeperStateMachine::IKeeperStateMachine(
|
||||
ResponsesQueue & responses_queue_,
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
const KeeperContextPtr & keeper_context_,
|
||||
@ -52,12 +52,6 @@ KeeperStateMachine::KeeperStateMachine(
|
||||
CommitCallback commit_callback_,
|
||||
const std::string & superdigest_)
|
||||
: commit_callback(commit_callback_)
|
||||
, snapshot_manager(
|
||||
keeper_context_->getCoordinationSettings()->snapshots_to_keep,
|
||||
keeper_context_,
|
||||
keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format,
|
||||
superdigest_,
|
||||
keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds())
|
||||
, responses_queue(responses_queue_)
|
||||
, snapshots_queue(snapshots_queue_)
|
||||
, min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache)
|
||||
@ -68,6 +62,32 @@ KeeperStateMachine::KeeperStateMachine(
|
||||
{
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
KeeperStateMachine<Storage>::KeeperStateMachine(
|
||||
ResponsesQueue & responses_queue_,
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
// const CoordinationSettingsPtr & coordination_settings_,
|
||||
const KeeperContextPtr & keeper_context_,
|
||||
KeeperSnapshotManagerS3 * snapshot_manager_s3_,
|
||||
IKeeperStateMachine::CommitCallback commit_callback_,
|
||||
const std::string & superdigest_)
|
||||
: IKeeperStateMachine(
|
||||
responses_queue_,
|
||||
snapshots_queue_,
|
||||
/// coordination_settings_,
|
||||
keeper_context_,
|
||||
snapshot_manager_s3_,
|
||||
commit_callback_,
|
||||
superdigest_),
|
||||
snapshot_manager(
|
||||
keeper_context_->getCoordinationSettings()->snapshots_to_keep,
|
||||
keeper_context_,
|
||||
keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format,
|
||||
superdigest_,
|
||||
keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds())
|
||||
{
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -78,7 +98,8 @@ bool isLocalDisk(const IDisk & disk)
|
||||
|
||||
}
|
||||
|
||||
void KeeperStateMachine::init()
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::init()
|
||||
{
|
||||
/// Do everything without mutexes, no other threads exist.
|
||||
LOG_DEBUG(log, "Totally have {} snapshots", snapshot_manager.totalSnapshots());
|
||||
@ -123,7 +144,7 @@ void KeeperStateMachine::init()
|
||||
LOG_DEBUG(log, "No existing snapshots, last committed log index {}", last_committed_idx);
|
||||
|
||||
if (!storage)
|
||||
storage = std::make_unique<KeeperStorage>(
|
||||
storage = std::make_unique<Storage>(
|
||||
keeper_context->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context);
|
||||
}
|
||||
|
||||
@ -131,13 +152,13 @@ namespace
|
||||
{
|
||||
|
||||
void assertDigest(
|
||||
const KeeperStorage::Digest & expected,
|
||||
const KeeperStorage::Digest & actual,
|
||||
const KeeperStorageBase::Digest & expected,
|
||||
const KeeperStorageBase::Digest & actual,
|
||||
const Coordination::ZooKeeperRequest & request,
|
||||
uint64_t log_idx,
|
||||
bool committing)
|
||||
{
|
||||
if (!KeeperStorage::checkDigest(expected, actual))
|
||||
if (!KeeperStorageBase::checkDigest(expected, actual))
|
||||
{
|
||||
LOG_FATAL(
|
||||
getLogger("KeeperStateMachine"),
|
||||
@ -170,7 +191,8 @@ struct TSA_SCOPED_LOCKABLE LockGuardWithStats final
|
||||
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data)
|
||||
template<typename Storage>
|
||||
nuraft::ptr<nuraft::buffer> KeeperStateMachine<Storage>::pre_commit(uint64_t log_idx, nuraft::buffer & data)
|
||||
{
|
||||
auto result = nuraft::buffer::alloc(sizeof(log_idx));
|
||||
nuraft::buffer_serializer ss(result);
|
||||
@ -191,10 +213,10 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::pre_commit(uint64_t log_idx, nur
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<KeeperStorage::RequestForSession> KeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version)
|
||||
std::shared_ptr<KeeperStorageBase::RequestForSession> IKeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version)
|
||||
{
|
||||
ReadBufferFromNuraftBuffer buffer(data);
|
||||
auto request_for_session = std::make_shared<KeeperStorage::RequestForSession>();
|
||||
auto request_for_session = std::make_shared<KeeperStorageBase::RequestForSession>();
|
||||
readIntBinary(request_for_session->session_id, buffer);
|
||||
|
||||
int32_t length;
|
||||
@ -267,7 +289,7 @@ std::shared_ptr<KeeperStorage::RequestForSession> KeeperStateMachine::parseReque
|
||||
|
||||
request_for_session->digest.emplace();
|
||||
readIntBinary(request_for_session->digest->version, buffer);
|
||||
if (request_for_session->digest->version != KeeperStorage::DigestVersion::NO_DIGEST || !buffer.eof())
|
||||
if (request_for_session->digest->version != KeeperStorageBase::DigestVersion::NO_DIGEST || !buffer.eof())
|
||||
readIntBinary(request_for_session->digest->value, buffer);
|
||||
}
|
||||
|
||||
@ -283,7 +305,8 @@ std::shared_ptr<KeeperStorage::RequestForSession> KeeperStateMachine::parseReque
|
||||
return request_for_session;
|
||||
}
|
||||
|
||||
bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & request_for_session)
|
||||
template<typename Storage>
|
||||
bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
{
|
||||
const auto op_num = request_for_session.request->getOpNum();
|
||||
if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig)
|
||||
@ -317,10 +340,11 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
|
||||
return true;
|
||||
}
|
||||
|
||||
void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session)
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::reconfigure(const KeeperStorageBase::RequestForSession& request_for_session)
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session);
|
||||
KeeperStorageBase::ResponseForSession response = processReconfiguration(request_for_session);
|
||||
if (!responses_queue.push(response))
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::KeeperCommitsFailed);
|
||||
@ -330,8 +354,9 @@ void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& req
|
||||
}
|
||||
}
|
||||
|
||||
KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration(
|
||||
const KeeperStorage::RequestForSession & request_for_session)
|
||||
template<typename Storage>
|
||||
KeeperStorageBase::ResponseForSession KeeperStateMachine<Storage>::processReconfiguration(
|
||||
const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::KeeperReconfigRequest);
|
||||
|
||||
@ -340,7 +365,7 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration(
|
||||
const int64_t zxid = request_for_session.zxid;
|
||||
|
||||
using enum Coordination::Error;
|
||||
auto bad_request = [&](Coordination::Error code = ZBADARGUMENTS) -> KeeperStorage::ResponseForSession
|
||||
auto bad_request = [&](Coordination::Error code = ZBADARGUMENTS) -> KeeperStorageBase::ResponseForSession
|
||||
{
|
||||
auto res = std::make_shared<Coordination::ZooKeeperReconfigResponse>();
|
||||
res->xid = request.xid;
|
||||
@ -397,7 +422,8 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration(
|
||||
return { session_id, std::move(response) };
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data)
|
||||
template<typename Storage>
|
||||
nuraft::ptr<nuraft::buffer> KeeperStateMachine<Storage>::commit(const uint64_t log_idx, nuraft::buffer & data)
|
||||
{
|
||||
auto request_for_session = parseRequest(data, true);
|
||||
if (!request_for_session->zxid)
|
||||
@ -408,7 +434,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
|
||||
if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session))
|
||||
return nullptr;
|
||||
|
||||
auto try_push = [&](const KeeperStorage::ResponseForSession & response)
|
||||
auto try_push = [&](const KeeperStorageBase::ResponseForSession & response)
|
||||
{
|
||||
if (!responses_queue.push(response))
|
||||
{
|
||||
@ -430,7 +456,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
|
||||
std::shared_ptr<Coordination::ZooKeeperSessionIDResponse> response = std::make_shared<Coordination::ZooKeeperSessionIDResponse>();
|
||||
response->internal_id = session_id_request.internal_id;
|
||||
response->server_id = session_id_request.server_id;
|
||||
KeeperStorage::ResponseForSession response_for_session;
|
||||
KeeperStorageBase::ResponseForSession response_for_session;
|
||||
response_for_session.session_id = -1;
|
||||
response_for_session.response = response;
|
||||
response_for_session.request = request_for_session->request;
|
||||
@ -451,7 +477,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
|
||||
}
|
||||
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
KeeperStorage::ResponsesForSessions responses_for_sessions
|
||||
KeeperStorageBase::ResponsesForSessions responses_for_sessions
|
||||
= storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
|
||||
|
||||
for (auto & response_for_session : responses_for_sessions)
|
||||
@ -482,7 +508,8 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
|
||||
template<typename Storage>
|
||||
bool KeeperStateMachine<Storage>::apply_snapshot(nuraft::snapshot & s)
|
||||
{
|
||||
LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx());
|
||||
nuraft::ptr<nuraft::buffer> latest_snapshot_ptr;
|
||||
@ -509,7 +536,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
|
||||
{ /// deserialize and apply snapshot to storage
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
|
||||
SnapshotDeserializationResult snapshot_deserialization_result;
|
||||
SnapshotDeserializationResult<Storage> snapshot_deserialization_result;
|
||||
if (latest_snapshot_ptr)
|
||||
snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
|
||||
else
|
||||
@ -530,7 +557,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
|
||||
}
|
||||
|
||||
|
||||
void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf)
|
||||
void IKeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf)
|
||||
{
|
||||
std::lock_guard lock(cluster_config_lock);
|
||||
auto tmp = new_conf->serialize();
|
||||
@ -538,7 +565,7 @@ void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr<nuraf
|
||||
keeper_context->setLastCommitIndex(log_idx);
|
||||
}
|
||||
|
||||
void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
|
||||
void IKeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
|
||||
{
|
||||
/// Don't rollback anything until the first commit because nothing was preprocessed
|
||||
if (!keeper_context->localLogsPreprocessed())
|
||||
@ -554,7 +581,8 @@ void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
|
||||
rollbackRequest(*request_for_session, false);
|
||||
}
|
||||
|
||||
void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing)
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing)
|
||||
{
|
||||
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
|
||||
return;
|
||||
@ -563,7 +591,8 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession
|
||||
storage->rollbackRequest(request_for_session.zxid, allow_missing);
|
||||
}
|
||||
|
||||
void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing)
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::rollbackRequestNoLock(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing)
|
||||
{
|
||||
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
|
||||
return;
|
||||
@ -571,14 +600,15 @@ void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSe
|
||||
storage->rollbackRequest(request_for_session.zxid, allow_missing);
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> KeeperStateMachine::last_snapshot()
|
||||
nuraft::ptr<nuraft::snapshot> IKeeperStateMachine::last_snapshot()
|
||||
{
|
||||
/// Just return the latest snapshot.
|
||||
std::lock_guard lock(snapshots_lock);
|
||||
return latest_snapshot_meta;
|
||||
}
|
||||
|
||||
void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_result<bool>::handler_type & when_done)
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::create_snapshot(nuraft::snapshot & s, nuraft::async_result<bool>::handler_type & when_done)
|
||||
{
|
||||
LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx());
|
||||
|
||||
@ -587,14 +617,15 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
|
||||
CreateSnapshotTask snapshot_task;
|
||||
{ /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking.
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot>(storage.get(), snapshot_meta_copy, getClusterConfig());
|
||||
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot<Storage>>(storage.get(), snapshot_meta_copy, getClusterConfig());
|
||||
}
|
||||
|
||||
/// create snapshot task for background execution (in snapshot thread)
|
||||
snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup)
|
||||
snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot_, bool execute_only_cleanup)
|
||||
{
|
||||
nuraft::ptr<std::exception> exception(nullptr);
|
||||
bool ret = false;
|
||||
auto && snapshot = std::get<std::shared_ptr<KeeperStorageSnapshot<Storage>>>(std::move(snapshot_));
|
||||
if (!execute_only_cleanup)
|
||||
{
|
||||
try
|
||||
@ -683,7 +714,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res
|
||||
LOG_WARNING(log, "Cannot push snapshot task into queue");
|
||||
}
|
||||
|
||||
void KeeperStateMachine::save_logical_snp_obj(
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::save_logical_snp_obj(
|
||||
nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool /*is_first_obj*/, bool /*is_last_obj*/)
|
||||
{
|
||||
LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
|
||||
@ -748,7 +780,7 @@ static int bufferFromFile(LoggerPtr log, const std::string & path, nuraft::ptr<n
|
||||
return 0;
|
||||
}
|
||||
|
||||
int KeeperStateMachine::read_logical_snp_obj(
|
||||
int IKeeperStateMachine::read_logical_snp_obj(
|
||||
nuraft::snapshot & s, void *& /*user_snp_ctx*/, uint64_t obj_id, nuraft::ptr<nuraft::buffer> & data_out, bool & is_last_obj)
|
||||
{
|
||||
LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
|
||||
@ -788,7 +820,8 @@ int KeeperStateMachine::read_logical_snp_obj(
|
||||
return 1;
|
||||
}
|
||||
|
||||
void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session)
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
{
|
||||
/// Pure local request, just process it with storage
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
@ -804,103 +837,120 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperStateMachine::shutdownStorage()
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::shutdownStorage()
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
storage->finalize();
|
||||
}
|
||||
|
||||
std::vector<int64_t> KeeperStateMachine::getDeadSessions()
|
||||
template<typename Storage>
|
||||
std::vector<int64_t> KeeperStateMachine<Storage>::getDeadSessions()
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getDeadSessions();
|
||||
}
|
||||
|
||||
int64_t KeeperStateMachine::getNextZxid() const
|
||||
template<typename Storage>
|
||||
int64_t KeeperStateMachine<Storage>::getNextZxid() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getNextZXID();
|
||||
}
|
||||
|
||||
KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const
|
||||
template<typename Storage>
|
||||
KeeperStorageBase::Digest KeeperStateMachine<Storage>::getNodesDigest() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getNodesDigest(false);
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getLastProcessedZxid() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getLastProcessedZxid() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getZXID();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getNodesCount() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getNodesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getNodesCount();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getTotalWatchesCount() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getTotalWatchesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getTotalWatchesCount();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getWatchedPathsCount() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getWatchedPathsCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getWatchedPathsCount();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getSessionsWithWatchesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getSessionsWithWatchesCount();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getTotalEphemeralNodesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getTotalEphemeralNodesCount();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getSessionWithEphemeralNodesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getSessionWithEphemeralNodesCount();
|
||||
}
|
||||
|
||||
void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::dumpWatches(WriteBufferFromOwnString & buf) const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
storage->dumpWatches(buf);
|
||||
}
|
||||
|
||||
void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
storage->dumpWatchesByPath(buf);
|
||||
}
|
||||
|
||||
void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
storage->dumpSessionsAndEphemerals(buf);
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getApproximateDataSize() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getApproximateDataSize() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getApproximateDataSize();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getKeyArenaSize() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getKeyArenaSize() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getArenaDataSize();
|
||||
}
|
||||
|
||||
uint64_t KeeperStateMachine::getLatestSnapshotSize() const
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getLatestSnapshotSize() const
|
||||
{
|
||||
auto snapshot_info = [&]
|
||||
{
|
||||
@ -923,7 +973,7 @@ uint64_t KeeperStateMachine::getLatestSnapshotSize() const
|
||||
return size;
|
||||
}
|
||||
|
||||
ClusterConfigPtr KeeperStateMachine::getClusterConfig() const
|
||||
ClusterConfigPtr IKeeperStateMachine::getClusterConfig() const
|
||||
{
|
||||
std::lock_guard lock(cluster_config_lock);
|
||||
if (cluster_config)
|
||||
@ -935,11 +985,18 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void KeeperStateMachine::recalculateStorageStats()
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::recalculateStorageStats()
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LOG_INFO(log, "Recalculating storage stats");
|
||||
storage->recalculateStats();
|
||||
LOG_INFO(log, "Done recalculating storage stats");
|
||||
}
|
||||
|
||||
template class KeeperStateMachine<KeeperMemoryStorage>;
|
||||
#if USE_ROCKSDB
|
||||
template class KeeperStateMachine<KeeperRocksStorage>;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -11,26 +11,24 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using ResponsesQueue = ConcurrentBoundedQueue<KeeperStorage::ResponseForSession>;
|
||||
using ResponsesQueue = ConcurrentBoundedQueue<KeeperStorageBase::ResponseForSession>;
|
||||
using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;
|
||||
|
||||
/// ClickHouse Keeper state machine. Wrapper for KeeperStorage.
|
||||
/// Responsible for entries commit, snapshots creation and so on.
|
||||
class KeeperStateMachine : public nuraft::state_machine
|
||||
class IKeeperStateMachine : public nuraft::state_machine
|
||||
{
|
||||
public:
|
||||
using CommitCallback = std::function<void(uint64_t, const KeeperStorage::RequestForSession &)>;
|
||||
using CommitCallback = std::function<void(uint64_t, const KeeperStorageBase::RequestForSession &)>;
|
||||
|
||||
KeeperStateMachine(
|
||||
IKeeperStateMachine(
|
||||
ResponsesQueue & responses_queue_,
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
const KeeperContextPtr & keeper_context_,
|
||||
KeeperSnapshotManagerS3 * snapshot_manager_s3_,
|
||||
CommitCallback commit_callback_ = {},
|
||||
const std::string & superdigest_ = "");
|
||||
CommitCallback commit_callback_,
|
||||
const std::string & superdigest_);
|
||||
|
||||
/// Read state from the latest snapshot
|
||||
void init();
|
||||
virtual void init() = 0;
|
||||
|
||||
enum ZooKeeperLogSerializationVersion
|
||||
{
|
||||
@ -47,89 +45,66 @@ public:
|
||||
///
|
||||
/// final - whether it's the final time we will fetch the request so we can safely remove it from cache
|
||||
/// serialization_version - information about which fields were parsed from the buffer so we can modify the buffer accordingly
|
||||
std::shared_ptr<KeeperStorage::RequestForSession> parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version = nullptr);
|
||||
std::shared_ptr<KeeperStorageBase::RequestForSession> parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version = nullptr);
|
||||
|
||||
bool preprocess(const KeeperStorage::RequestForSession & request_for_session);
|
||||
virtual bool preprocess(const KeeperStorageBase::RequestForSession & request_for_session) = 0;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> pre_commit(uint64_t log_idx, nuraft::buffer & data) override;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT
|
||||
|
||||
/// Save new cluster config to our snapshot (copy of the config stored in StateManager)
|
||||
void commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf) override; /// NOLINT
|
||||
|
||||
void rollback(uint64_t log_idx, nuraft::buffer & data) override;
|
||||
|
||||
// allow_missing - whether the transaction we want to rollback can be missing from storage
|
||||
// (can happen in case of exception during preprocessing)
|
||||
void rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing);
|
||||
|
||||
void rollbackRequestNoLock(
|
||||
const KeeperStorage::RequestForSession & request_for_session,
|
||||
bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS;
|
||||
virtual void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) = 0;
|
||||
|
||||
uint64_t last_commit_index() override { return keeper_context->lastCommittedIndex(); }
|
||||
|
||||
/// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state.
|
||||
bool apply_snapshot(nuraft::snapshot & s) override;
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> last_snapshot() override;
|
||||
|
||||
/// Create new snapshot from current state.
|
||||
void create_snapshot(nuraft::snapshot & s, nuraft::async_result<bool>::handler_type & when_done) override;
|
||||
void create_snapshot(nuraft::snapshot & s, nuraft::async_result<bool>::handler_type & when_done) override = 0;
|
||||
|
||||
/// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot.
|
||||
void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override;
|
||||
void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override = 0;
|
||||
|
||||
/// Better name is `serialize snapshot` -- save existing snapshot (created by create_snapshot) into
|
||||
/// in-memory buffer data_out.
|
||||
int read_logical_snp_obj(
|
||||
nuraft::snapshot & s, void *& user_snp_ctx, uint64_t obj_id, nuraft::ptr<nuraft::buffer> & data_out, bool & is_last_obj) override;
|
||||
|
||||
// This should be used only for tests or keeper-data-dumper because it violates
|
||||
// TSA -- we can't acquire the lock outside of this class or return a storage under lock
|
||||
// in a reasonable way.
|
||||
KeeperStorage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
return *storage;
|
||||
}
|
||||
|
||||
void shutdownStorage();
|
||||
virtual void shutdownStorage() = 0;
|
||||
|
||||
ClusterConfigPtr getClusterConfig() const;
|
||||
|
||||
/// Process local read request
|
||||
void processReadRequest(const KeeperStorage::RequestForSession & request_for_session);
|
||||
virtual void processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) = 0;
|
||||
|
||||
std::vector<int64_t> getDeadSessions();
|
||||
virtual std::vector<int64_t> getDeadSessions() = 0;
|
||||
|
||||
int64_t getNextZxid() const;
|
||||
virtual int64_t getNextZxid() const = 0;
|
||||
|
||||
KeeperStorage::Digest getNodesDigest() const;
|
||||
virtual KeeperStorageBase::Digest getNodesDigest() const = 0;
|
||||
|
||||
/// Introspection functions for 4lw commands
|
||||
uint64_t getLastProcessedZxid() const;
|
||||
virtual uint64_t getLastProcessedZxid() const = 0;
|
||||
|
||||
uint64_t getNodesCount() const;
|
||||
uint64_t getTotalWatchesCount() const;
|
||||
uint64_t getWatchedPathsCount() const;
|
||||
uint64_t getSessionsWithWatchesCount() const;
|
||||
virtual uint64_t getNodesCount() const = 0;
|
||||
virtual uint64_t getTotalWatchesCount() const = 0;
|
||||
virtual uint64_t getWatchedPathsCount() const = 0;
|
||||
virtual uint64_t getSessionsWithWatchesCount() const = 0;
|
||||
|
||||
void dumpWatches(WriteBufferFromOwnString & buf) const;
|
||||
void dumpWatchesByPath(WriteBufferFromOwnString & buf) const;
|
||||
void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const;
|
||||
virtual void dumpWatches(WriteBufferFromOwnString & buf) const = 0;
|
||||
virtual void dumpWatchesByPath(WriteBufferFromOwnString & buf) const = 0;
|
||||
virtual void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const = 0;
|
||||
|
||||
uint64_t getSessionWithEphemeralNodesCount() const;
|
||||
uint64_t getTotalEphemeralNodesCount() const;
|
||||
uint64_t getApproximateDataSize() const;
|
||||
uint64_t getKeyArenaSize() const;
|
||||
uint64_t getLatestSnapshotSize() const;
|
||||
virtual uint64_t getSessionWithEphemeralNodesCount() const = 0;
|
||||
virtual uint64_t getTotalEphemeralNodesCount() const = 0;
|
||||
virtual uint64_t getApproximateDataSize() const = 0;
|
||||
virtual uint64_t getKeyArenaSize() const = 0;
|
||||
virtual uint64_t getLatestSnapshotSize() const = 0;
|
||||
|
||||
void recalculateStorageStats();
|
||||
virtual void recalculateStorageStats() = 0;
|
||||
|
||||
void reconfigure(const KeeperStorage::RequestForSession& request_for_session);
|
||||
virtual void reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) = 0;
|
||||
|
||||
private:
|
||||
protected:
|
||||
CommitCallback commit_callback;
|
||||
/// In our state machine we always have a single snapshot which is stored
|
||||
/// in memory in compressed (serialized) format.
|
||||
@ -137,12 +112,9 @@ private:
|
||||
std::shared_ptr<SnapshotFileInfo> latest_snapshot_info;
|
||||
nuraft::ptr<nuraft::buffer> latest_snapshot_buf = nullptr;
|
||||
|
||||
/// Main state machine logic
|
||||
KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock);
|
||||
CoordinationSettingsPtr coordination_settings;
|
||||
|
||||
/// Save/Load and Serialize/Deserialize logic for snapshots.
|
||||
KeeperSnapshotManager snapshot_manager;
|
||||
|
||||
/// Put processed responses into this queue
|
||||
ResponsesQueue & responses_queue;
|
||||
|
||||
@ -159,7 +131,7 @@ private:
|
||||
/// for request.
|
||||
mutable std::mutex storage_and_responses_lock;
|
||||
|
||||
std::unordered_map<int64_t, std::unordered_map<Coordination::XID, std::shared_ptr<KeeperStorage::RequestForSession>>> parsed_request_cache;
|
||||
std::unordered_map<int64_t, std::unordered_map<Coordination::XID, std::shared_ptr<KeeperStorageBase::RequestForSession>>> parsed_request_cache;
|
||||
uint64_t min_request_size_to_cache{0};
|
||||
/// we only need to protect the access to the map itself
|
||||
/// requests can be modified from anywhere without lock because a single request
|
||||
@ -181,7 +153,104 @@ private:
|
||||
|
||||
KeeperSnapshotManagerS3 * snapshot_manager_s3;
|
||||
|
||||
KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session)
|
||||
TSA_REQUIRES(storage_and_responses_lock);
|
||||
virtual KeeperStorageBase::ResponseForSession processReconfiguration(
|
||||
const KeeperStorageBase::RequestForSession& request_for_session)
|
||||
TSA_REQUIRES(storage_and_responses_lock) = 0;
|
||||
|
||||
};
|
||||
|
||||
/// ClickHouse Keeper state machine. Wrapper for KeeperStorage.
|
||||
/// Responsible for entries commit, snapshots creation and so on.
|
||||
template<typename Storage>
|
||||
class KeeperStateMachine : public IKeeperStateMachine
|
||||
{
|
||||
public:
|
||||
/// using CommitCallback = std::function<void(uint64_t, const KeeperStorage::RequestForSession &)>;
|
||||
|
||||
KeeperStateMachine(
|
||||
ResponsesQueue & responses_queue_,
|
||||
SnapshotsQueue & snapshots_queue_,
|
||||
/// const CoordinationSettingsPtr & coordination_settings_,
|
||||
const KeeperContextPtr & keeper_context_,
|
||||
KeeperSnapshotManagerS3 * snapshot_manager_s3_,
|
||||
CommitCallback commit_callback_ = {},
|
||||
const std::string & superdigest_ = "");
|
||||
|
||||
/// Read state from the latest snapshot
|
||||
void init() override;
|
||||
|
||||
bool preprocess(const KeeperStorageBase::RequestForSession & request_for_session) override;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> pre_commit(uint64_t log_idx, nuraft::buffer & data) override;
|
||||
|
||||
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT
|
||||
|
||||
// allow_missing - whether the transaction we want to rollback can be missing from storage
|
||||
// (can happen in case of exception during preprocessing)
|
||||
void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) override;
|
||||
|
||||
void rollbackRequestNoLock(
|
||||
const KeeperStorageBase::RequestForSession & request_for_session,
|
||||
bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS;
|
||||
|
||||
/// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state.
|
||||
bool apply_snapshot(nuraft::snapshot & s) override;
|
||||
|
||||
/// Create new snapshot from current state.
|
||||
void create_snapshot(nuraft::snapshot & s, nuraft::async_result<bool>::handler_type & when_done) override;
|
||||
|
||||
/// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot.
|
||||
void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override;
|
||||
|
||||
// This should be used only for tests or keeper-data-dumper because it violates
|
||||
// TSA -- we can't acquire the lock outside of this class or return a storage under lock
|
||||
// in a reasonable way.
|
||||
Storage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
return *storage;
|
||||
}
|
||||
|
||||
void shutdownStorage() override;
|
||||
|
||||
/// Process local read request
|
||||
void processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) override;
|
||||
|
||||
std::vector<int64_t> getDeadSessions() override;
|
||||
|
||||
int64_t getNextZxid() const override;
|
||||
|
||||
KeeperStorageBase::Digest getNodesDigest() const override;
|
||||
|
||||
/// Introspection functions for 4lw commands
|
||||
uint64_t getLastProcessedZxid() const override;
|
||||
|
||||
uint64_t getNodesCount() const override;
|
||||
uint64_t getTotalWatchesCount() const override;
|
||||
uint64_t getWatchedPathsCount() const override;
|
||||
uint64_t getSessionsWithWatchesCount() const override;
|
||||
|
||||
void dumpWatches(WriteBufferFromOwnString & buf) const override;
|
||||
void dumpWatchesByPath(WriteBufferFromOwnString & buf) const override;
|
||||
void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const override;
|
||||
|
||||
uint64_t getSessionWithEphemeralNodesCount() const override;
|
||||
uint64_t getTotalEphemeralNodesCount() const override;
|
||||
uint64_t getApproximateDataSize() const override;
|
||||
uint64_t getKeyArenaSize() const override;
|
||||
uint64_t getLatestSnapshotSize() const override;
|
||||
|
||||
void recalculateStorageStats() override;
|
||||
|
||||
void reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) override;
|
||||
|
||||
private:
|
||||
/// Main state machine logic
|
||||
std::unique_ptr<Storage> storage; //TSA_PT_GUARDED_BY(storage_and_responses_lock);
|
||||
|
||||
/// Save/Load and Serialize/Deserialize logic for snapshots.
|
||||
KeeperSnapshotManager<Storage> snapshot_manager;
|
||||
|
||||
KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
TSA_REQUIRES(storage_and_responses_lock) override;
|
||||
};
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user