mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge branch 'master' into jaccard_similarity
This commit is contained in:
commit
08b2441784
43
.github/workflows/master.yml
vendored
43
.github/workflows/master.yml
vendored
@ -850,6 +850,48 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinRISCV64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_riscv64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
@ -932,6 +974,7 @@ jobs:
|
||||
- BuilderBinDarwinAarch64
|
||||
- BuilderBinFreeBSD
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinRISCV64
|
||||
- BuilderBinAmd64Compat
|
||||
- BuilderBinAarch64V80Compat
|
||||
- BuilderBinClangTidy
|
||||
|
42
.github/workflows/pull_request.yml
vendored
42
.github/workflows/pull_request.yml
vendored
@ -911,6 +911,47 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinRISCV64:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_riscv64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
@ -992,6 +1033,7 @@ jobs:
|
||||
- BuilderBinDarwinAarch64
|
||||
- BuilderBinFreeBSD
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinRISCV64
|
||||
- BuilderBinAmd64Compat
|
||||
- BuilderBinAarch64V80Compat
|
||||
- BuilderBinClangTidy
|
||||
|
@ -33,6 +33,19 @@ if (CMAKE_CROSSCOMPILING)
|
||||
elseif (ARCH_PPC64LE)
|
||||
set (ENABLE_GRPC OFF CACHE INTERNAL "")
|
||||
set (ENABLE_SENTRY OFF CACHE INTERNAL "")
|
||||
elseif (ARCH_RISCV64)
|
||||
# RISC-V support is preliminary
|
||||
set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "")
|
||||
set (ENABLE_LDAP OFF CACHE INTERNAL "")
|
||||
set (OPENSSL_NO_ASM ON CACHE INTERNAL "")
|
||||
set (ENABLE_JEMALLOC ON CACHE INTERNAL "")
|
||||
set (ENABLE_PARQUET OFF CACHE INTERNAL "")
|
||||
set (USE_UNWIND OFF CACHE INTERNAL "")
|
||||
set (ENABLE_GRPC OFF CACHE INTERNAL "")
|
||||
set (ENABLE_HDFS OFF CACHE INTERNAL "")
|
||||
set (ENABLE_MYSQL OFF CACHE INTERNAL "")
|
||||
# It might be ok, but we need to update 'sysroot'
|
||||
set (ENABLE_RUST OFF CACHE INTERNAL "")
|
||||
elseif (ARCH_S390X)
|
||||
set (ENABLE_GRPC OFF CACHE INTERNAL "")
|
||||
set (ENABLE_SENTRY OFF CACHE INTERNAL "")
|
||||
|
@ -138,6 +138,7 @@ def parse_env_variables(
|
||||
ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat"
|
||||
FREEBSD_SUFFIX = "-freebsd"
|
||||
PPC_SUFFIX = "-ppc64le"
|
||||
RISCV_SUFFIX = "-riscv64"
|
||||
AMD64_COMPAT_SUFFIX = "-amd64-compat"
|
||||
|
||||
result = []
|
||||
@ -150,6 +151,7 @@ def parse_env_variables(
|
||||
is_cross_arm = compiler.endswith(ARM_SUFFIX)
|
||||
is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX)
|
||||
is_cross_ppc = compiler.endswith(PPC_SUFFIX)
|
||||
is_cross_riscv = compiler.endswith(RISCV_SUFFIX)
|
||||
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
|
||||
is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX)
|
||||
|
||||
@ -206,6 +208,11 @@ def parse_env_variables(
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
|
||||
)
|
||||
elif is_cross_riscv:
|
||||
cc = compiler[: -len(RISCV_SUFFIX)]
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake"
|
||||
)
|
||||
elif is_amd64_compat:
|
||||
cc = compiler[: -len(AMD64_COMPAT_SUFFIX)]
|
||||
result.append("DEB_ARCH=amd64")
|
||||
@ -370,6 +377,7 @@ def parse_args() -> argparse.Namespace:
|
||||
"clang-16-aarch64",
|
||||
"clang-16-aarch64-v80compat",
|
||||
"clang-16-ppc64le",
|
||||
"clang-16-riscv64",
|
||||
"clang-16-amd64-compat",
|
||||
"clang-16-freebsd",
|
||||
),
|
||||
|
@ -47,11 +47,13 @@ ENV TZ=Etc/UTC
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
# Unpin the docker version after the release 24.0.3 is released
|
||||
# https://github.com/moby/moby/issues/45770#issuecomment-1618255130
|
||||
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
|
||||
&& add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" \
|
||||
&& apt-get update \
|
||||
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
docker-ce \
|
||||
docker-ce='5:23.*' \
|
||||
&& rm -rf \
|
||||
/var/lib/apt/lists/* \
|
||||
/var/cache/debconf \
|
||||
|
@ -67,6 +67,13 @@ start
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
|
||||
|
||||
# Start server from previous release
|
||||
# Let's enable S3 storage by default
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# force_sync=false doesn't work correctly on some older versions
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
|
||||
@ -76,17 +83,11 @@ sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-serv
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
|
||||
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
|
||||
# Start server from previous release
|
||||
# Let's enable S3 storage by default
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
@ -94,7 +94,10 @@ RUN mkdir /tmp/ccache \
|
||||
&& rm -rf /tmp/ccache
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG SCCACHE_VERSION=v0.4.1
|
||||
ARG SCCACHE_VERSION=v0.5.4
|
||||
ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||
# sccache requires a value for the region. So by default we use The Default Region
|
||||
ENV SCCACHE_REGION=us-east-1
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) rarch=x86_64 ;; \
|
||||
|
@ -33,6 +33,9 @@ then
|
||||
elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ]
|
||||
then
|
||||
DIR="powerpc64le"
|
||||
elif [ "${ARCH}" = "riscv64" ]
|
||||
then
|
||||
DIR="riscv64"
|
||||
fi
|
||||
elif [ "${OS}" = "FreeBSD" ]
|
||||
then
|
||||
|
@ -54,7 +54,7 @@ $ sudo mysql
|
||||
|
||||
``` sql
|
||||
mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse';
|
||||
mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION;
|
||||
mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'localhost' WITH GRANT OPTION;
|
||||
```
|
||||
|
||||
Then configure the connection in `/etc/odbc.ini`.
|
||||
@ -66,7 +66,7 @@ DRIVER = /usr/local/lib/libmyodbc5w.so
|
||||
SERVER = 127.0.0.1
|
||||
PORT = 3306
|
||||
DATABASE = test
|
||||
USERNAME = clickhouse
|
||||
USER = clickhouse
|
||||
PASSWORD = clickhouse
|
||||
```
|
||||
|
||||
@ -83,6 +83,9 @@ $ isql -v mysqlconn
|
||||
Table in MySQL:
|
||||
|
||||
``` text
|
||||
mysql> CREATE DATABASE test;
|
||||
Query OK, 1 row affected (0,01 sec)
|
||||
|
||||
mysql> CREATE TABLE `test`.`test` (
|
||||
-> `int_id` INT NOT NULL AUTO_INCREMENT,
|
||||
-> `int_nullable` INT NULL DEFAULT NULL,
|
||||
@ -91,10 +94,10 @@ mysql> CREATE TABLE `test`.`test` (
|
||||
-> PRIMARY KEY (`int_id`));
|
||||
Query OK, 0 rows affected (0,09 sec)
|
||||
|
||||
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
|
||||
mysql> insert into test.test (`int_id`, `float`) VALUES (1,2);
|
||||
Query OK, 1 row affected (0,00 sec)
|
||||
|
||||
mysql> select * from test;
|
||||
mysql> select * from test.test;
|
||||
+------+----------+-----+----------+
|
||||
| int_id | int_nullable | float | float_nullable |
|
||||
+------+----------+-----+----------+
|
||||
|
@ -3201,6 +3201,40 @@ ENGINE = Log
|
||||
└──────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## default_temporary_table_engine {#default_temporary_table_engine}
|
||||
|
||||
Same as [default_table_engine](#default_table_engine) but for temporary tables.
|
||||
|
||||
Default value: `Memory`.
|
||||
|
||||
In this example, any new temporary table that does not specify an `Engine` will use the `Log` table engine:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SET default_temporary_table_engine = 'Log';
|
||||
|
||||
CREATE TEMPORARY TABLE my_table (
|
||||
x UInt32,
|
||||
y UInt32
|
||||
);
|
||||
|
||||
SHOW CREATE TEMPORARY TABLE my_table;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─statement────────────────────────────────────────────────────────────────┐
|
||||
│ CREATE TEMPORARY TABLE default.my_table
|
||||
(
|
||||
`x` UInt32,
|
||||
`y` UInt32
|
||||
)
|
||||
ENGINE = Log
|
||||
└──────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## data_type_default_nullable {#data_type_default_nullable}
|
||||
|
||||
Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable).
|
||||
|
@ -9,7 +9,6 @@ Columns:
|
||||
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
|
||||
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name.
|
||||
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
|
||||
|
||||
@ -20,18 +19,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │
|
||||
└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
|
||||
┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │
|
||||
└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
@ -13,6 +13,7 @@ System tables provide information about:
|
||||
|
||||
- Server states, processes, and environment.
|
||||
- Server’s internal processes.
|
||||
- Options used when the ClickHouse binary was built.
|
||||
|
||||
System tables:
|
||||
|
||||
|
@ -283,7 +283,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ
|
||||
|
||||
`SHOW INDEX` produces a result table with the following structure:
|
||||
- table - The name of the table (String)
|
||||
- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8)
|
||||
- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8)
|
||||
- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
|
||||
- seq_in_index - Currently unused
|
||||
- column_name - Currently unused
|
||||
|
@ -8,7 +8,6 @@ slug: /ru/operations/system-tables/asynchronous_metric_log
|
||||
Столбцы:
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
|
||||
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время события в микросекундах.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — название метрики.
|
||||
- `value` ([Float64](../../sql-reference/data-types/float.md)) — значение метрики.
|
||||
|
||||
|
@ -8,7 +8,6 @@ slug: /zh/operations/system-tables/asynchronous_metric_log
|
||||
列:
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期。
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间。
|
||||
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒)。
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — 指标名。
|
||||
- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。
|
||||
|
||||
@ -17,18 +16,18 @@ slug: /zh/operations/system-tables/asynchronous_metric_log
|
||||
SELECT * FROM system.asynchronous_metric_log LIMIT 10
|
||||
```
|
||||
``` text
|
||||
┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │
|
||||
└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
|
||||
┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │
|
||||
└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
**另请参阅**
|
||||
|
@ -192,7 +192,7 @@ SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
|
||||
**返回值**
|
||||
|
||||
- 如果`x`不为`NULL`,返回非`Nullable`类型的原始值。
|
||||
- 如果`x`为`NULL`,返回对应非`Nullable`类型的默认值。
|
||||
- 如果`x`为`NULL`,则返回任意值。
|
||||
|
||||
**示例**
|
||||
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
String relative_path_from = validatePathAndGetAsRelative(path_from);
|
||||
String relative_path_to = validatePathAndGetAsRelative(path_to);
|
||||
|
||||
disk_from->copy(relative_path_from, disk_to, relative_path_to);
|
||||
disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
|
||||
{
|
||||
auto keeper_context = std::make_shared<KeeperContext>(true);
|
||||
keeper_context->setDigestEnabled(true);
|
||||
keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>(), 0));
|
||||
keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));
|
||||
|
||||
DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
|
||||
|
||||
|
@ -8,7 +8,9 @@
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/NullChannel.h>
|
||||
#include <Poco/SimpleFileChannel.h>
|
||||
#include <Databases/DatabaseFilesystem.h>
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <Databases/DatabasesOverlay.h>
|
||||
#include <Storages/System/attachSystemTables.h>
|
||||
#include <Storages/System/attachInformationSchemaTables.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
@ -50,6 +52,8 @@
|
||||
#include <base/argsToConfig.h>
|
||||
#include <filesystem>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if defined(FUZZING_MODE)
|
||||
#include <Functions/getFuzzerData.h>
|
||||
#endif
|
||||
@ -170,6 +174,13 @@ static DatabasePtr createMemoryDatabaseIfNotExists(ContextPtr context, const Str
|
||||
return system_database;
|
||||
}
|
||||
|
||||
static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_)
|
||||
{
|
||||
auto databaseCombiner = std::make_shared<DatabasesOverlay>(name_, context_);
|
||||
databaseCombiner->registerNextDatabase(std::make_shared<DatabaseFilesystem>(name_, "", context_));
|
||||
databaseCombiner->registerNextDatabase(std::make_shared<DatabaseMemory>(name_, context_));
|
||||
return databaseCombiner;
|
||||
}
|
||||
|
||||
/// If path is specified and not empty, will try to setup server environment and load existing metadata
|
||||
void LocalServer::tryInitPath()
|
||||
@ -669,7 +680,7 @@ void LocalServer::processConfig()
|
||||
* if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
|
||||
*/
|
||||
std::string default_database = config().getString("default_database", "_local");
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, global_context));
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
|
||||
global_context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions(global_context);
|
||||
|
||||
|
@ -1581,6 +1581,15 @@ try
|
||||
/// After attaching system databases we can initialize system log.
|
||||
global_context->initializeSystemLogs();
|
||||
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
|
||||
/// Build loggers before tables startup to make log messages from tables
|
||||
/// attach available in system.text_log
|
||||
{
|
||||
String level_str = config().getString("text_log.level", "");
|
||||
int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str);
|
||||
setTextLog(global_context->getTextLog(), level);
|
||||
|
||||
buildLoggers(config(), logger());
|
||||
}
|
||||
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
|
||||
attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper);
|
||||
attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA));
|
||||
@ -1707,14 +1716,6 @@ try
|
||||
/// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
|
||||
async_metrics.start();
|
||||
|
||||
{
|
||||
String level_str = config().getString("text_log.level", "");
|
||||
int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str);
|
||||
setTextLog(global_context->getTextLog(), level);
|
||||
}
|
||||
|
||||
buildLoggers(config(), logger());
|
||||
|
||||
main_config_reloader->start();
|
||||
access_control.startPeriodicReloading();
|
||||
|
||||
|
@ -12,7 +12,8 @@
|
||||
--chart-background: white;
|
||||
--shadow-color: rgba(0, 0, 0, 0.25);
|
||||
--input-shadow-color: rgba(0, 255, 0, 1);
|
||||
--error-color: white;
|
||||
--error-color: red;
|
||||
--auth-error-color: white;
|
||||
--legend-background: rgba(255, 255, 255, 0.75);
|
||||
--title-color: #666;
|
||||
--text-color: black;
|
||||
@ -258,7 +259,7 @@
|
||||
width: 60%;
|
||||
padding: .5rem;
|
||||
|
||||
color: var(--error-color);
|
||||
color: var(--auth-error-color);
|
||||
|
||||
display: flex;
|
||||
flex-flow: row nowrap;
|
||||
@ -906,9 +907,9 @@ async function draw(idx, chart, url_params, query) {
|
||||
|
||||
if (error) {
|
||||
const errorMatch = errorMessages.find(({ regex }) => error.match(regex))
|
||||
if (errorMatch) {
|
||||
const match = error.match(errorMatch.regex)
|
||||
const message = errorMatch.messageFunc(match)
|
||||
const match = error.match(errorMatch.regex)
|
||||
const message = errorMatch.messageFunc(match)
|
||||
if (message) {
|
||||
const authError = new Error(message)
|
||||
throw authError
|
||||
}
|
||||
@ -930,7 +931,7 @@ async function draw(idx, chart, url_params, query) {
|
||||
let title_div = chart.querySelector('.title');
|
||||
if (error) {
|
||||
error_div.firstChild.data = error;
|
||||
title_div.style.display = 'none';
|
||||
title_div.style.display = 'none';
|
||||
error_div.style.display = 'block';
|
||||
return false;
|
||||
} else {
|
||||
@ -1019,13 +1020,15 @@ async function drawAll() {
|
||||
firstLoad = false;
|
||||
} else {
|
||||
enableReloadButton();
|
||||
enableRunButton();
|
||||
}
|
||||
if (!results.includes(false)) {
|
||||
if (results.includes(true)) {
|
||||
const element = document.querySelector('.inputs');
|
||||
element.classList.remove('unconnected');
|
||||
const add = document.querySelector('#add');
|
||||
add.style.display = 'block';
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
const charts = document.querySelector('#charts')
|
||||
charts.style.height = '0px';
|
||||
}
|
||||
@ -1050,6 +1053,13 @@ function disableReloadButton() {
|
||||
reloadButton.classList.add('disabled')
|
||||
}
|
||||
|
||||
function disableRunButton() {
|
||||
const runButton = document.getElementById('run')
|
||||
runButton.value = 'Reloading...'
|
||||
runButton.disabled = true
|
||||
runButton.classList.add('disabled')
|
||||
}
|
||||
|
||||
function enableReloadButton() {
|
||||
const reloadButton = document.getElementById('reload')
|
||||
reloadButton.value = 'Reload'
|
||||
@ -1057,11 +1067,19 @@ function enableReloadButton() {
|
||||
reloadButton.classList.remove('disabled')
|
||||
}
|
||||
|
||||
function enableRunButton() {
|
||||
const runButton = document.getElementById('run')
|
||||
runButton.value = 'Ok'
|
||||
runButton.disabled = false
|
||||
runButton.classList.remove('disabled')
|
||||
}
|
||||
|
||||
function reloadAll() {
|
||||
updateParams();
|
||||
drawAll();
|
||||
saveState();
|
||||
disableReloadButton()
|
||||
disableReloadButton();
|
||||
disableRunButton();
|
||||
}
|
||||
|
||||
document.getElementById('params').onsubmit = function(event) {
|
||||
|
@ -155,7 +155,7 @@ namespace
|
||||
|
||||
|
||||
AccessRightsElement::AccessRightsElement(AccessFlags access_flags_, std::string_view database_)
|
||||
: access_flags(access_flags_), database(database_), any_database(false)
|
||||
: access_flags(access_flags_), database(database_), parameter(database_), any_database(false), any_parameter(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,7 @@ enum class AccessType
|
||||
M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \
|
||||
\
|
||||
M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", DATABASE, ALTER_DATABASE) /* allows to execute ALTER MODIFY SETTING */\
|
||||
M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute ALTER NAMED COLLECTION */\
|
||||
M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute ALTER NAMED COLLECTION */\
|
||||
\
|
||||
M(ALTER_TABLE, "", GROUP, ALTER) \
|
||||
M(ALTER_DATABASE, "", GROUP, ALTER) \
|
||||
@ -92,7 +92,7 @@ enum class AccessType
|
||||
M(CREATE_ARBITRARY_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables
|
||||
with arbitrary table engine */\
|
||||
M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \
|
||||
M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute CREATE NAMED COLLECTION */ \
|
||||
M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute CREATE NAMED COLLECTION */ \
|
||||
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
|
||||
\
|
||||
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\
|
||||
@ -101,7 +101,7 @@ enum class AccessType
|
||||
implicitly enabled by the grant DROP_TABLE */\
|
||||
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
|
||||
M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\
|
||||
M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute DROP NAMED COLLECTION */\
|
||||
M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute DROP NAMED COLLECTION */\
|
||||
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
|
||||
\
|
||||
M(UNDROP_TABLE, "", TABLE, ALL) /* allows to execute {UNDROP} TABLE */\
|
||||
@ -140,9 +140,10 @@ enum class AccessType
|
||||
M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \
|
||||
M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \
|
||||
M(ACCESS_MANAGEMENT, "", GROUP, ALL) \
|
||||
M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \
|
||||
M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \
|
||||
M(NAMED_COLLECTION_CONTROL, "", NAMED_COLLECTION, ALL) \
|
||||
M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \
|
||||
M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \
|
||||
M(NAMED_COLLECTION, "NAMED COLLECTION USAGE, USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \
|
||||
M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \
|
||||
\
|
||||
M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
|
@ -328,7 +328,7 @@ namespace
|
||||
|
||||
if (!named_collection_control)
|
||||
{
|
||||
user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL);
|
||||
user->access.revoke(AccessType::NAMED_COLLECTION_ADMIN);
|
||||
}
|
||||
|
||||
if (!show_named_collections_secrets)
|
||||
|
@ -53,7 +53,7 @@ TEST(AccessRights, Union)
|
||||
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
|
||||
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
|
||||
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
|
||||
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION CONTROL ON db1");
|
||||
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1");
|
||||
}
|
||||
|
||||
|
||||
|
@ -24,7 +24,7 @@ protected:
|
||||
/// Make local disk.
|
||||
temp_dir = std::make_unique<Poco::TemporaryFile>();
|
||||
temp_dir->createDirectories();
|
||||
local_disk = std::make_shared<DiskLocal>("local_disk", temp_dir->path() + "/", 0);
|
||||
local_disk = std::make_shared<DiskLocal>("local_disk", temp_dir->path() + "/");
|
||||
|
||||
/// Make encrypted disk.
|
||||
auto settings = std::make_unique<DiskEncryptedSettings>();
|
||||
@ -38,7 +38,7 @@ protected:
|
||||
settings->current_key = key;
|
||||
settings->current_key_fingerprint = fingerprint;
|
||||
|
||||
encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings), true);
|
||||
encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings));
|
||||
}
|
||||
|
||||
void TearDown() override
|
||||
|
@ -2297,7 +2297,9 @@ void ClientBase::runInteractive()
|
||||
catch (const ErrnoException & e)
|
||||
{
|
||||
if (e.getErrno() != EEXIST)
|
||||
throw;
|
||||
{
|
||||
std::cerr << getCurrentExceptionMessage(false) << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,15 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
|
||||
quota_key = config.getString("quota_key", "");
|
||||
|
||||
/// By default compression is disabled if address looks like localhost.
|
||||
compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host)))
|
||||
|
||||
/// Avoid DNS request if the host is "localhost".
|
||||
/// If ClickHouse is run under QEMU-user with a binary for a different architecture,
|
||||
/// and there are all listed startup dependency shared libraries available, but not the runtime dependencies of glibc,
|
||||
/// the glibc cannot open "plugins" for DNS resolving, and the DNS resolution does not work.
|
||||
/// At the same time, I want clickhouse-local to always work, regardless.
|
||||
/// TODO: get rid of glibc, or replace getaddrinfo to c-ares.
|
||||
|
||||
compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHost(host)))
|
||||
? Protocol::Compression::Enable : Protocol::Compression::Disable;
|
||||
|
||||
timeouts = ConnectionTimeouts(
|
||||
|
@ -101,9 +101,8 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti
|
||||
add_column("name", "columns", true, suggestion_limit);
|
||||
}
|
||||
|
||||
/// FIXME: Forbid this query using new analyzer because of bug https://github.com/ClickHouse/ClickHouse/issues/50669
|
||||
/// We should remove this restriction after resolving this bug.
|
||||
query = "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" + query + ") WHERE notEmpty(res) SETTINGS allow_experimental_analyzer=0";
|
||||
/// FIXME: This query does not work with the new analyzer because of bug https://github.com/ClickHouse/ClickHouse/issues/50669
|
||||
query = "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" + query + ") WHERE notEmpty(res)";
|
||||
return query;
|
||||
}
|
||||
|
||||
|
@ -107,8 +107,8 @@ struct FloatCompareHelper
|
||||
}
|
||||
};
|
||||
|
||||
template <class U> struct CompareHelper<Float32, U> : public FloatCompareHelper<Float32> {};
|
||||
template <class U> struct CompareHelper<Float64, U> : public FloatCompareHelper<Float64> {};
|
||||
template <typename U> struct CompareHelper<Float32, U> : public FloatCompareHelper<Float32> {};
|
||||
template <typename U> struct CompareHelper<Float64, U> : public FloatCompareHelper<Float64> {};
|
||||
|
||||
|
||||
/** A template for columns that use a simple array to store.
|
||||
|
@ -93,8 +93,8 @@
|
||||
M(ThreadPoolFSReaderThreadsActive, "Number of threads in the thread pool for local_filesystem_read_method=threadpool running a task.") \
|
||||
M(BackupsIOThreads, "Number of threads in the BackupsIO thread pool.") \
|
||||
M(BackupsIOThreadsActive, "Number of threads in the BackupsIO thread pool running a task.") \
|
||||
M(DiskObjectStorageAsyncThreads, "Number of threads in the async thread pool for DiskObjectStorage.") \
|
||||
M(DiskObjectStorageAsyncThreadsActive, "Number of threads in the async thread pool for DiskObjectStorage running a task.") \
|
||||
M(DiskObjectStorageAsyncThreads, "Obsolete metric, shows nothing.") \
|
||||
M(DiskObjectStorageAsyncThreadsActive, "Obsolete metric, shows nothing.") \
|
||||
M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \
|
||||
M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \
|
||||
M(TablesLoaderThreads, "Number of threads in the tables loader thread pool.") \
|
||||
@ -141,6 +141,8 @@
|
||||
M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
|
||||
M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
|
||||
M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
|
||||
M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \
|
||||
M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \
|
||||
M(SystemReplicasThreads, "Number of threads in the system.replicas thread pool.") \
|
||||
M(SystemReplicasThreadsActive, "Number of threads in the system.replicas thread pool running a task.") \
|
||||
M(RestartReplicaThreads, "Number of threads in the RESTART REPLICA thread pool.") \
|
||||
|
@ -418,6 +418,18 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b
|
||||
<< " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
|
||||
}
|
||||
catch (...) {}
|
||||
|
||||
// #ifdef ABORT_ON_LOGICAL_ERROR
|
||||
// try
|
||||
// {
|
||||
// throw;
|
||||
// }
|
||||
// catch (const std::logic_error &)
|
||||
// {
|
||||
// abortOnFailedAssertion(stream.str());
|
||||
// }
|
||||
// catch (...) {}
|
||||
// #endif
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -540,7 +540,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
}
|
||||
}
|
||||
|
||||
return re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
|
||||
return re2->Match({subject, subject_size}, 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -585,9 +585,9 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
return false;
|
||||
}
|
||||
|
||||
StringPieceType piece;
|
||||
std::string_view piece;
|
||||
|
||||
if (!RegexType::PartialMatch(StringPieceType(subject, subject_size), *re2, &piece))
|
||||
if (!RegexType::PartialMatch({subject, subject_size}, *re2, &piece))
|
||||
return false;
|
||||
else
|
||||
{
|
||||
@ -652,10 +652,10 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
|
||||
return 0;
|
||||
}
|
||||
|
||||
DB::PODArrayWithStackMemory<StringPieceType, 128> pieces(limit);
|
||||
DB::PODArrayWithStackMemory<std::string_view, 128> pieces(limit);
|
||||
|
||||
if (!re2->Match(
|
||||
StringPieceType(subject, subject_size),
|
||||
{subject, subject_size},
|
||||
0,
|
||||
subject_size,
|
||||
RegexType::UNANCHORED,
|
||||
|
@ -52,7 +52,6 @@ public:
|
||||
using MatchVec = std::vector<Match>;
|
||||
|
||||
using RegexType = std::conditional_t<thread_safe, re2::RE2, re2_st::RE2>;
|
||||
using StringPieceType = std::conditional_t<thread_safe, re2::StringPiece, re2_st::StringPiece>;
|
||||
|
||||
OptimizedRegularExpressionImpl(const std::string & regexp_, int options = 0); /// NOLINT
|
||||
/// StringSearcher store pointers to required_substring, it must be updated on move.
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <atomic>
|
||||
|
||||
#include <re2/re2.h>
|
||||
#include <re2/stringpiece.h>
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
@ -44,7 +43,7 @@ private:
|
||||
const std::string regexp_string;
|
||||
|
||||
const RE2 regexp;
|
||||
const re2::StringPiece replacement;
|
||||
const std::string_view replacement;
|
||||
|
||||
#ifndef NDEBUG
|
||||
mutable std::atomic<std::uint64_t> matches_count = 0;
|
||||
|
@ -67,8 +67,8 @@ ThreadGroup::ThreadGroup()
|
||||
: master_thread_id(CurrentThread::get().thread_id)
|
||||
{}
|
||||
|
||||
ThreadStatus::ThreadStatus()
|
||||
: thread_id{getThreadId()}
|
||||
ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_)
|
||||
: thread_id{getThreadId()}, check_current_thread_on_destruction(check_current_thread_on_destruction_)
|
||||
{
|
||||
last_rusage = std::make_unique<RUsageCounters>();
|
||||
|
||||
@ -201,8 +201,11 @@ ThreadStatus::~ThreadStatus()
|
||||
|
||||
/// Only change current_thread if it's currently being used by this ThreadStatus
|
||||
/// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread
|
||||
if (current_thread == this)
|
||||
if (check_current_thread_on_destruction)
|
||||
{
|
||||
assert(current_thread == this);
|
||||
current_thread = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadStatus::updatePerformanceCounters()
|
||||
|
@ -224,8 +224,10 @@ private:
|
||||
|
||||
Poco::Logger * log = nullptr;
|
||||
|
||||
bool check_current_thread_on_destruction;
|
||||
|
||||
public:
|
||||
ThreadStatus();
|
||||
explicit ThreadStatus(bool check_current_thread_on_destruction_ = true);
|
||||
~ThreadStatus();
|
||||
|
||||
ThreadGroupPtr getThreadGroup() const;
|
||||
|
@ -27,7 +27,7 @@ static thread_local size_t max_stack_size = 0;
|
||||
* @param out_address - if not nullptr, here the address of the stack will be written.
|
||||
* @return stack size
|
||||
*/
|
||||
size_t getStackSize(void ** out_address)
|
||||
static size_t getStackSize(void ** out_address)
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
@ -54,7 +54,15 @@ size_t getStackSize(void ** out_address)
|
||||
throwFromErrno("Cannot pthread_attr_get_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
|
||||
# else
|
||||
if (0 != pthread_getattr_np(pthread_self(), &attr))
|
||||
throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
|
||||
{
|
||||
if (errno == ENOENT)
|
||||
{
|
||||
/// Most likely procfs is not mounted.
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
|
||||
}
|
||||
# endif
|
||||
|
||||
SCOPE_EXIT({ pthread_attr_destroy(&attr); });
|
||||
@ -83,6 +91,10 @@ __attribute__((__weak__)) void checkStackSize()
|
||||
if (!stack_address)
|
||||
max_stack_size = getStackSize(&stack_address);
|
||||
|
||||
/// The check is impossible.
|
||||
if (!max_stack_size)
|
||||
return;
|
||||
|
||||
const void * frame_address = __builtin_frame_address(0);
|
||||
uintptr_t int_frame_address = reinterpret_cast<uintptr_t>(frame_address);
|
||||
uintptr_t int_stack_address = reinterpret_cast<uintptr_t>(stack_address);
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <re2/re2.h>
|
||||
#include <re2/stringpiece.h>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
@ -33,14 +32,14 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
|
||||
std::string escaped_with_globs = buf_for_escaping.str();
|
||||
|
||||
static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
|
||||
re2::StringPiece input(escaped_with_globs);
|
||||
re2::StringPiece matched;
|
||||
std::string_view input(escaped_with_globs);
|
||||
std::string_view matched;
|
||||
std::ostringstream oss_for_replacing; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
oss_for_replacing.exceptions(std::ios::failbit);
|
||||
size_t current_index = 0;
|
||||
while (RE2::FindAndConsume(&input, enum_or_range, &matched))
|
||||
{
|
||||
std::string buffer{matched};
|
||||
std::string buffer(matched);
|
||||
oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '(';
|
||||
|
||||
if (buffer.find(',') == std::string::npos)
|
||||
|
@ -42,7 +42,6 @@ private:
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
mutable LZ4::PerformanceStatistics lz4_stat;
|
||||
ASTPtr codec_desc;
|
||||
};
|
||||
|
||||
|
||||
|
@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab
|
||||
if (!fs::exists(path))
|
||||
fs::create_directories(path);
|
||||
|
||||
return std::make_shared<DiskLocal>("LocalLogDisk", path, 0);
|
||||
return std::make_shared<DiskLocal>("LocalLogDisk", path);
|
||||
};
|
||||
|
||||
/// the most specialized path
|
||||
@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti
|
||||
if (!fs::exists(path))
|
||||
fs::create_directories(path);
|
||||
|
||||
return std::make_shared<DiskLocal>("LocalSnapshotDisk", path, 0);
|
||||
return std::make_shared<DiskLocal>("LocalSnapshotDisk", path);
|
||||
};
|
||||
|
||||
/// the most specialized path
|
||||
@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A
|
||||
if (!fs::exists(path))
|
||||
fs::create_directories(path);
|
||||
|
||||
return std::make_shared<DiskLocal>("LocalStateFileDisk", path, 0);
|
||||
return std::make_shared<DiskLocal>("LocalStateFileDisk", path);
|
||||
};
|
||||
|
||||
if (config.has("keeper_server.state_storage_disk"))
|
||||
|
@ -71,16 +71,16 @@ protected:
|
||||
DB::KeeperContextPtr keeper_context = std::make_shared<DB::KeeperContext>(true);
|
||||
Poco::Logger * log{&Poco::Logger::get("CoordinationTest")};
|
||||
|
||||
void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared<DB::DiskLocal>("LogDisk", path, 0)); }
|
||||
void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared<DB::DiskLocal>("LogDisk", path)); }
|
||||
|
||||
void setSnapshotDirectory(const std::string & path)
|
||||
{
|
||||
keeper_context->setSnapshotDisk(std::make_shared<DB::DiskLocal>("SnapshotDisk", path, 0));
|
||||
keeper_context->setSnapshotDisk(std::make_shared<DB::DiskLocal>("SnapshotDisk", path));
|
||||
}
|
||||
|
||||
void setStateFileDirectory(const std::string & path)
|
||||
{
|
||||
keeper_context->setStateFileDisk(std::make_shared<DB::DiskLocal>("StateFile", path, 0));
|
||||
keeper_context->setStateFileDisk(std::make_shared<DB::DiskLocal>("StateFile", path));
|
||||
}
|
||||
};
|
||||
|
||||
@ -1503,9 +1503,9 @@ void testLogAndStateMachine(
|
||||
using namespace DB;
|
||||
|
||||
ChangelogDirTest snapshots("./snapshots");
|
||||
keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("SnapshotDisk", "./snapshots", 0));
|
||||
keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("SnapshotDisk", "./snapshots"));
|
||||
ChangelogDirTest logs("./logs");
|
||||
keeper_context->setLogDisk(std::make_shared<DiskLocal>("LogDisk", "./logs", 0));
|
||||
keeper_context->setLogDisk(std::make_shared<DiskLocal>("LogDisk", "./logs"));
|
||||
|
||||
ResponsesQueue queue(std::numeric_limits<size_t>::max());
|
||||
SnapshotsQueue snapshots_queue{1};
|
||||
|
@ -517,6 +517,7 @@ class IColumn;
|
||||
M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \
|
||||
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
|
||||
\
|
||||
M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
|
||||
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
|
||||
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
|
||||
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
|
||||
|
@ -1,10 +1,11 @@
|
||||
#include <base/defines.h>
|
||||
#include <Core/SettingsQuirks.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Poco/Environment.h>
|
||||
#include <Poco/Platform.h>
|
||||
#include <Common/VersionNumber.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <cstdlib>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -71,6 +72,12 @@ void applySettingsQuirks(Settings & settings, Poco::Logger * log)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(THREAD_SANITIZER)
|
||||
settings.use_hedged_requests.value = false;
|
||||
if (log)
|
||||
LOG_WARNING(log, "use_hedged_requests has been disabled for the build with Thread Sanitizer, because they are using fibers, leading to a failed assertion inside TSan");
|
||||
#endif
|
||||
|
||||
if (!queryProfilerWorks())
|
||||
{
|
||||
if (settings.query_profiler_real_time_period_ns)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <filesystem>
|
||||
#include <Databases/DatabaseAtomic.h>
|
||||
#include <Databases/DatabaseDictionary.h>
|
||||
#include <Databases/DatabaseFilesystem.h>
|
||||
#include <Databases/DatabaseLazy.h>
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <Databases/DatabaseOrdinary.h>
|
||||
@ -47,6 +48,14 @@
|
||||
#include <Databases/SQLite/DatabaseSQLite.h>
|
||||
#endif
|
||||
|
||||
#if USE_AWS_S3
|
||||
#include <Databases/DatabaseS3.h>
|
||||
#endif
|
||||
|
||||
#if USE_HDFS
|
||||
#include <Databases/DatabaseHDFS.h>
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
@ -131,13 +140,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
|
||||
static const std::unordered_set<std::string_view> database_engines{"Ordinary", "Atomic", "Memory",
|
||||
"Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL",
|
||||
"PostgreSQL", "MaterializedPostgreSQL", "SQLite"};
|
||||
"PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
|
||||
|
||||
if (!database_engines.contains(engine_name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name);
|
||||
|
||||
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
|
||||
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"};
|
||||
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
|
||||
|
||||
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"};
|
||||
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
|
||||
@ -432,6 +441,63 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
}
|
||||
#endif
|
||||
|
||||
else if (engine_name == "Filesystem")
|
||||
{
|
||||
const ASTFunction * engine = engine_define->engine;
|
||||
|
||||
/// If init_path is empty, then the current path will be used
|
||||
std::string init_path;
|
||||
|
||||
if (engine->arguments && !engine->arguments->children.empty())
|
||||
{
|
||||
if (engine->arguments->children.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires at most 1 argument: filesystem_path");
|
||||
|
||||
const auto & arguments = engine->arguments->children;
|
||||
init_path = safeGetLiteralValue<String>(arguments[0], engine_name);
|
||||
}
|
||||
|
||||
return std::make_shared<DatabaseFilesystem>(database_name, init_path, context);
|
||||
}
|
||||
|
||||
#if USE_AWS_S3
|
||||
else if (engine_name == "S3")
|
||||
{
|
||||
const ASTFunction * engine = engine_define->engine;
|
||||
|
||||
DatabaseS3::Configuration config;
|
||||
|
||||
if (engine->arguments && !engine->arguments->children.empty())
|
||||
{
|
||||
ASTs & engine_args = engine->arguments->children;
|
||||
config = DatabaseS3::parseArguments(engine_args, context);
|
||||
}
|
||||
|
||||
return std::make_shared<DatabaseS3>(database_name, config, context);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_HDFS
|
||||
else if (engine_name == "HDFS")
|
||||
{
|
||||
const ASTFunction * engine = engine_define->engine;
|
||||
|
||||
/// If source_url is empty, then table name must contain full url
|
||||
std::string source_url;
|
||||
|
||||
if (engine->arguments && !engine->arguments->children.empty())
|
||||
{
|
||||
if (engine->arguments->children.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS database requires at most 1 argument: source_url");
|
||||
|
||||
const auto & arguments = engine->arguments->children;
|
||||
source_url = safeGetLiteralValue<String>(arguments[0], engine_name);
|
||||
}
|
||||
|
||||
return std::make_shared<DatabaseHDFS>(database_name, source_url, context);
|
||||
}
|
||||
#endif
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", engine_name);
|
||||
}
|
||||
|
||||
|
245
src/Databases/DatabaseFilesystem.cpp
Normal file
245
src/Databases/DatabaseFilesystem.cpp
Normal file
@ -0,0 +1,245 @@
|
||||
#include <Databases/DatabaseFilesystem.h>
|
||||
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int PATH_ACCESS_DENIED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path_, ContextPtr context_)
|
||||
: IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")"))
|
||||
{
|
||||
bool is_local = context_->getApplicationType() == Context::ApplicationType::LOCAL;
|
||||
fs::path user_files_path = is_local ? "" : fs::canonical(getContext()->getUserFilesPath());
|
||||
|
||||
if (fs::path(path).is_relative())
|
||||
{
|
||||
path = user_files_path / path;
|
||||
}
|
||||
else if (!is_local && !pathStartsWith(fs::path(path), user_files_path))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Path must be inside user-files path: {}", user_files_path.string());
|
||||
}
|
||||
|
||||
path = fs::absolute(path).lexically_normal();
|
||||
if (!fs::exists(path))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path does not exist: {}", path);
|
||||
}
|
||||
|
||||
std::string DatabaseFilesystem::getTablePath(const std::string & table_name) const
|
||||
{
|
||||
fs::path table_path = fs::path(path) / table_name;
|
||||
return table_path.lexically_normal().string();
|
||||
}
|
||||
|
||||
void DatabaseFilesystem::addTable(const std::string & table_name, StoragePtr table_storage) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto [_, inserted] = loaded_tables.emplace(table_name, table_storage);
|
||||
if (!inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Table with name `{}` already exists in database `{}` (engine {})",
|
||||
table_name, getDatabaseName(), getEngineName());
|
||||
}
|
||||
|
||||
bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
/// If run in Local mode, no need for path checking.
|
||||
bool check_path = context_->getApplicationType() != Context::ApplicationType::LOCAL;
|
||||
const auto & user_files_path = context_->getUserFilesPath();
|
||||
|
||||
/// Check access for file before checking its existence.
|
||||
if (check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if the corresponding file exists.
|
||||
if (!fs::exists(table_path))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist: {}", table_path);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!fs::is_regular_file(table_path))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST,
|
||||
"File is directory, but expected a file: {}", table_path);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::tryGetTableFromCache(const std::string & name) const
|
||||
{
|
||||
StoragePtr table = nullptr;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = loaded_tables.find(name);
|
||||
if (it != loaded_tables.end())
|
||||
table = it->second;
|
||||
}
|
||||
|
||||
/// Invalidate cache if file no longer exists.
|
||||
if (table && !fs::exists(getTablePath(name)))
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.erase(name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) const
|
||||
{
|
||||
if (tryGetTableFromCache(name))
|
||||
return true;
|
||||
|
||||
return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */false);
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Check if table exists in loaded tables map.
|
||||
if (auto table = tryGetTableFromCache(name))
|
||||
return table;
|
||||
|
||||
auto table_path = getTablePath(name);
|
||||
checkTableFilePath(table_path, context_, /* throw_on_error */true);
|
||||
|
||||
/// If the file exists, create a new table using TableFunctionFile and return it.
|
||||
auto args = makeASTFunction("file", std::make_shared<ASTLiteral>(table_path));
|
||||
|
||||
auto table_function = TableFunctionFactory::instance().get(args, context_);
|
||||
if (!table_function)
|
||||
return nullptr;
|
||||
|
||||
/// TableFunctionFile throws exceptions, if table cannot be created.
|
||||
auto table_storage = table_function->execute(args, context_, name);
|
||||
if (table_storage)
|
||||
addTable(name, table_storage);
|
||||
|
||||
return table_storage;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// getTableImpl can throw exceptions, do not catch them to show correct error to user.
|
||||
if (auto storage = getTableImpl(name, context_))
|
||||
return storage;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||
backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return getTableImpl(name, context_);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
/// Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table
|
||||
/// see tests/02722_database_filesystem.sh for more details.
|
||||
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool DatabaseFilesystem::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return loaded_tables.empty();
|
||||
}
|
||||
|
||||
ASTPtr DatabaseFilesystem::getCreateDatabaseQuery() const
|
||||
{
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
const String query = fmt::format("CREATE DATABASE {} ENGINE = Filesystem('{}')", backQuoteIfNeed(getDatabaseName()), path);
|
||||
|
||||
ParserCreateQuery parser;
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth);
|
||||
|
||||
if (const auto database_comment = getDatabaseComment(); !database_comment.empty())
|
||||
{
|
||||
auto & ast_create_query = ast->as<ASTCreateQuery &>();
|
||||
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(database_comment));
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
void DatabaseFilesystem::shutdown()
|
||||
{
|
||||
Tables tables_snapshot;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
tables_snapshot = loaded_tables;
|
||||
}
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
{
|
||||
auto table_id = kv.second->getStorageID();
|
||||
kv.second->flushAndShutdown();
|
||||
}
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an empty vector because the database is read-only and no tables can be backed up
|
||||
*/
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseFilesystem::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns an empty iterator because the database does not have its own tables
|
||||
* But only caches them for quick access
|
||||
*/
|
||||
DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction &) const
|
||||
{
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
|
||||
}
|
||||
|
||||
}
|
67
src/Databases/DatabaseFilesystem.h
Normal file
67
src/Databases/DatabaseFilesystem.h
Normal file
@ -0,0 +1,67 @@
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* DatabaseFilesystem allows to interact with files stored on the local filesystem.
|
||||
* Uses TableFunctionFile to implicitly load file when a user requests the table,
|
||||
* and provides a read-only access to the data in the file.
|
||||
* Tables are cached inside the database for quick access
|
||||
*
|
||||
* Used in clickhouse-local to access local files.
|
||||
* For clickhouse-server requires allows to access file only from user_files directory.
|
||||
*/
|
||||
class DatabaseFilesystem : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
DatabaseFilesystem(const String & name, const String & path, ContextPtr context);
|
||||
|
||||
String getEngineName() const override { return "Filesystem"; }
|
||||
|
||||
bool isTableExist(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
bool shouldBeEmptyOnDetach() const override { return false; } /// Contains only temporary tables.
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override;
|
||||
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override;
|
||||
|
||||
protected:
|
||||
StoragePtr getTableImpl(const String & name, ContextPtr context) const;
|
||||
|
||||
StoragePtr tryGetTableFromCache(const std::string & name) const;
|
||||
|
||||
std::string getTablePath(const std::string & table_name) const;
|
||||
|
||||
void addTable(const std::string & table_name, StoragePtr table_storage) const;
|
||||
|
||||
bool checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const;
|
||||
|
||||
private:
|
||||
String path;
|
||||
mutable Tables loaded_tables TSA_GUARDED_BY(mutex);
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
234
src/Databases/DatabaseHDFS.cpp
Normal file
234
src/Databases/DatabaseHDFS.cpp
Normal file
@ -0,0 +1,234 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_HDFS
|
||||
|
||||
#include <Databases/DatabaseHDFS.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
#include <Poco/URI.h>
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int UNACCEPTABLE_URL;
|
||||
extern const int ACCESS_DENIED;
|
||||
extern const int DATABASE_ACCESS_DENIED;
|
||||
extern const int HDFS_ERROR;
|
||||
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
|
||||
}
|
||||
|
||||
static constexpr std::string_view HDFS_HOST_REGEXP = "^hdfs://[^/]*";
|
||||
|
||||
|
||||
DatabaseHDFS::DatabaseHDFS(const String & name_, const String & source_url, ContextPtr context_)
|
||||
: IDatabase(name_)
|
||||
, WithContext(context_->getGlobalContext())
|
||||
, source(source_url)
|
||||
, log(&Poco::Logger::get("DatabaseHDFS(" + name_ + ")"))
|
||||
{
|
||||
if (!source.empty())
|
||||
{
|
||||
if (!re2::RE2::FullMatch(source, std::string(HDFS_HOST_REGEXP)))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs host: {}. "
|
||||
"It should have structure 'hdfs://<host_name>:<port>'", source);
|
||||
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(source));
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseHDFS::addTable(const std::string & table_name, StoragePtr table_storage) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto [_, inserted] = loaded_tables.emplace(table_name, table_storage);
|
||||
if (!inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Table with name `{}` already exists in database `{}` (engine {})",
|
||||
table_name, getDatabaseName(), getEngineName());
|
||||
}
|
||||
|
||||
std::string DatabaseHDFS::getTablePath(const std::string & table_name) const
|
||||
{
|
||||
if (table_name.starts_with("hdfs://"))
|
||||
return table_name;
|
||||
|
||||
if (source.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. "
|
||||
"It should have structure 'hdfs://<host_name>:<port>/path'", table_name);
|
||||
|
||||
return fs::path(source) / table_name;
|
||||
}
|
||||
|
||||
bool DatabaseHDFS::checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
try
|
||||
{
|
||||
checkHDFSURL(url);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(url));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DatabaseHDFS::isTableExist(const String & name, ContextPtr context_) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (loaded_tables.find(name) != loaded_tables.end())
|
||||
return true;
|
||||
|
||||
return checkUrl(name, context_, false);
|
||||
}
|
||||
|
||||
StoragePtr DatabaseHDFS::getTableImpl(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Check if the table exists in the loaded tables map.
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = loaded_tables.find(name);
|
||||
if (it != loaded_tables.end())
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto url = getTablePath(name);
|
||||
|
||||
checkUrl(url, context_, true);
|
||||
|
||||
auto args = makeASTFunction("hdfs", std::make_shared<ASTLiteral>(url));
|
||||
|
||||
auto table_function = TableFunctionFactory::instance().get(args, context_);
|
||||
if (!table_function)
|
||||
return nullptr;
|
||||
|
||||
/// TableFunctionHDFS throws exceptions, if table cannot be created.
|
||||
auto table_storage = table_function->execute(args, context_, name);
|
||||
if (table_storage)
|
||||
addTable(name, table_storage);
|
||||
|
||||
return table_storage;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseHDFS::getTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Rethrow all exceptions from TableFunctionHDFS to show correct error to user.
|
||||
if (auto storage = getTableImpl(name, context_))
|
||||
return storage;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||
backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
}
|
||||
|
||||
StoragePtr DatabaseHDFS::tryGetTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return getTableImpl(name, context_);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
// Ignore exceptions thrown by TableFunctionHDFS, which indicate that there is no table
|
||||
if (e.code() == ErrorCodes::BAD_ARGUMENTS
|
||||
|| e.code() == ErrorCodes::ACCESS_DENIED
|
||||
|| e.code() == ErrorCodes::DATABASE_ACCESS_DENIED
|
||||
|| e.code() == ErrorCodes::FILE_DOESNT_EXIST
|
||||
|| e.code() == ErrorCodes::UNACCEPTABLE_URL
|
||||
|| e.code() == ErrorCodes::HDFS_ERROR
|
||||
|| e.code() == ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
catch (const Poco::URISyntaxException &)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool DatabaseHDFS::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return loaded_tables.empty();
|
||||
}
|
||||
|
||||
ASTPtr DatabaseHDFS::getCreateDatabaseQuery() const
|
||||
{
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
ParserCreateQuery parser;
|
||||
|
||||
const String query = fmt::format("CREATE DATABASE {} ENGINE = HDFS('{}')", backQuoteIfNeed(getDatabaseName()), source);
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth);
|
||||
|
||||
if (const auto database_comment = getDatabaseComment(); !database_comment.empty())
|
||||
{
|
||||
auto & ast_create_query = ast->as<ASTCreateQuery &>();
|
||||
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(database_comment));
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
void DatabaseHDFS::shutdown()
|
||||
{
|
||||
Tables tables_snapshot;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
tables_snapshot = loaded_tables;
|
||||
}
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
{
|
||||
auto table_id = kv.second->getStorageID();
|
||||
kv.second->flushAndShutdown();
|
||||
}
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an empty vector because the database is read-only and no tables can be backed up
|
||||
*/
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseHDFS::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns an empty iterator because the database does not have its own tables
|
||||
* But only caches them for quick access
|
||||
*/
|
||||
DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const FilterByNameFunction &) const
|
||||
{
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
|
||||
}
|
||||
|
||||
} // DB
|
||||
|
||||
#endif
|
68
src/Databases/DatabaseHDFS.h
Normal file
68
src/Databases/DatabaseHDFS.h
Normal file
@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_HDFS
|
||||
|
||||
#include <mutex>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* DatabaseHDFS allows to interact with files stored on the file system.
|
||||
* Uses TableFunctionHDFS to implicitly load file when a user requests the table,
|
||||
* and provides read-only access to the data in the file.
|
||||
* Tables are cached inside the database for quick access.
|
||||
*/
|
||||
class DatabaseHDFS : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
DatabaseHDFS(const String & name, const String & source_url, ContextPtr context);
|
||||
|
||||
String getEngineName() const override { return "S3"; }
|
||||
|
||||
bool isTableExist(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
bool shouldBeEmptyOnDetach() const override { return false; } /// Contains only temporary tables.
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override;
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override;
|
||||
|
||||
protected:
|
||||
StoragePtr getTableImpl(const String & name, ContextPtr context) const;
|
||||
|
||||
void addTable(const std::string & table_name, StoragePtr table_storage) const;
|
||||
|
||||
bool checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const;
|
||||
|
||||
std::string getTablePath(const std::string & table_name) const;
|
||||
|
||||
private:
|
||||
const String source;
|
||||
|
||||
mutable Tables loaded_tables TSA_GUARDED_BY(mutex);
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
312
src/Databases/DatabaseS3.cpp
Normal file
312
src/Databases/DatabaseS3.cpp
Normal file
@ -0,0 +1,312 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <Databases/DatabaseS3.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <IO/S3/URI.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static const std::unordered_set<std::string_view> optional_configuration_keys = {
|
||||
"url",
|
||||
"access_key_id",
|
||||
"secret_access_key",
|
||||
"no_sign_request"
|
||||
};
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int UNACCEPTABLE_URL;
|
||||
extern const int S3_ERROR;
|
||||
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
DatabaseS3::DatabaseS3(const String & name_, const Configuration& config_, ContextPtr context_)
|
||||
: IDatabase(name_)
|
||||
, WithContext(context_->getGlobalContext())
|
||||
, config(config_)
|
||||
, log(&Poco::Logger::get("DatabaseS3(" + name_ + ")"))
|
||||
{
|
||||
}
|
||||
|
||||
void DatabaseS3::addTable(const std::string & table_name, StoragePtr table_storage) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto [_, inserted] = loaded_tables.emplace(table_name, table_storage);
|
||||
if (!inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Table with name `{}` already exists in database `{}` (engine {})",
|
||||
table_name, getDatabaseName(), getEngineName());
|
||||
}
|
||||
|
||||
std::string DatabaseS3::getFullUrl(const std::string & name) const
|
||||
{
|
||||
if (!config.url_prefix.empty())
|
||||
return fs::path(config.url_prefix) / name;
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
bool DatabaseS3::checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
try
|
||||
{
|
||||
S3::URI uri(url);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri.uri);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DatabaseS3::isTableExist(const String & name, ContextPtr context_) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (loaded_tables.find(name) != loaded_tables.end())
|
||||
return true;
|
||||
|
||||
return checkUrl(getFullUrl(name), context_, false);
|
||||
}
|
||||
|
||||
StoragePtr DatabaseS3::getTableImpl(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Check if the table exists in the loaded tables map.
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = loaded_tables.find(name);
|
||||
if (it != loaded_tables.end())
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto url = getFullUrl(name);
|
||||
checkUrl(url, context_, /* throw_on_error */true);
|
||||
|
||||
auto function = std::make_shared<ASTFunction>();
|
||||
function->name = "s3";
|
||||
function->arguments = std::make_shared<ASTExpressionList>();
|
||||
function->children.push_back(function->arguments);
|
||||
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>(url));
|
||||
if (config.no_sign_request)
|
||||
{
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>("NOSIGN"));
|
||||
}
|
||||
else if (config.access_key_id.has_value() && config.secret_access_key.has_value())
|
||||
{
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>(config.access_key_id.value()));
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>(config.secret_access_key.value()));
|
||||
}
|
||||
|
||||
auto table_function = TableFunctionFactory::instance().get(function, context_);
|
||||
if (!table_function)
|
||||
return nullptr;
|
||||
|
||||
/// TableFunctionS3 throws exceptions, if table cannot be created.
|
||||
auto table_storage = table_function->execute(function, context_, name);
|
||||
if (table_storage)
|
||||
addTable(name, table_storage);
|
||||
|
||||
return table_storage;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseS3::getTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Rethrow all exceptions from TableFunctionS3 to show correct error to user.
|
||||
if (auto storage = getTableImpl(name, context_))
|
||||
return storage;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||
backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
}
|
||||
|
||||
StoragePtr DatabaseS3::tryGetTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return getTableImpl(name, context_);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
/// Ignore exceptions thrown by TableFunctionS3, which indicate that there is no table.
|
||||
if (e.code() == ErrorCodes::BAD_ARGUMENTS
|
||||
|| e.code() == ErrorCodes::S3_ERROR
|
||||
|| e.code() == ErrorCodes::FILE_DOESNT_EXIST
|
||||
|| e.code() == ErrorCodes::UNACCEPTABLE_URL)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
catch (const Poco::URISyntaxException &)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool DatabaseS3::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return loaded_tables.empty();
|
||||
}
|
||||
|
||||
ASTPtr DatabaseS3::getCreateDatabaseQuery() const
|
||||
{
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
ParserCreateQuery parser;
|
||||
|
||||
std::string creation_args;
|
||||
creation_args += fmt::format("'{}'", config.url_prefix);
|
||||
if (config.no_sign_request)
|
||||
creation_args += ", 'NOSIGN'";
|
||||
else if (config.access_key_id.has_value() && config.secret_access_key.has_value())
|
||||
creation_args += fmt::format(", '{}', '{}'", config.access_key_id.value(), config.secret_access_key.value());
|
||||
|
||||
const String query = fmt::format("CREATE DATABASE {} ENGINE = S3({})", backQuoteIfNeed(getDatabaseName()), creation_args);
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth);
|
||||
|
||||
if (const auto database_comment = getDatabaseComment(); !database_comment.empty())
|
||||
{
|
||||
auto & ast_create_query = ast->as<ASTCreateQuery &>();
|
||||
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(database_comment));
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
void DatabaseS3::shutdown()
|
||||
{
|
||||
Tables tables_snapshot;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
tables_snapshot = loaded_tables;
|
||||
}
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
{
|
||||
auto table_id = kv.second->getStorageID();
|
||||
kv.second->flushAndShutdown();
|
||||
}
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.clear();
|
||||
}
|
||||
|
||||
DatabaseS3::Configuration DatabaseS3::parseArguments(ASTs engine_args, ContextPtr context_)
|
||||
{
|
||||
Configuration result;
|
||||
|
||||
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context_))
|
||||
{
|
||||
auto & collection = *named_collection;
|
||||
|
||||
validateNamedCollection(collection, {}, optional_configuration_keys);
|
||||
|
||||
result.url_prefix = collection.getOrDefault<String>("url", "");
|
||||
result.no_sign_request = collection.getOrDefault<bool>("no_sign_request", false);
|
||||
|
||||
auto key_id = collection.getOrDefault<String>("access_key_id", "");
|
||||
auto secret_key = collection.getOrDefault<String>("secret_access_key", "");
|
||||
|
||||
if (!key_id.empty())
|
||||
result.access_key_id = key_id;
|
||||
|
||||
if (!secret_key.empty())
|
||||
result.secret_access_key = secret_key;
|
||||
}
|
||||
else
|
||||
{
|
||||
const std::string supported_signature =
|
||||
" - S3()\n"
|
||||
" - S3('url')\n"
|
||||
" - S3('url', 'NOSIGN')\n"
|
||||
" - S3('url', 'access_key_id', 'secret_access_key')\n";
|
||||
const auto error_message =
|
||||
fmt::format("Engine DatabaseS3 must have the following arguments signature\n{}", supported_signature);
|
||||
|
||||
for (auto & arg : engine_args)
|
||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_);
|
||||
|
||||
if (engine_args.size() > 3)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message.c_str());
|
||||
|
||||
if (engine_args.empty())
|
||||
return result;
|
||||
|
||||
result.url_prefix = checkAndGetLiteralArgument<String>(engine_args[0], "url");
|
||||
|
||||
// url, NOSIGN
|
||||
if (engine_args.size() == 2)
|
||||
{
|
||||
auto second_arg = checkAndGetLiteralArgument<String>(engine_args[1], "NOSIGN");
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
result.no_sign_request = true;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, error_message.c_str());
|
||||
}
|
||||
|
||||
// url, access_key_id, secret_access_key
|
||||
if (engine_args.size() == 3)
|
||||
{
|
||||
auto key_id = checkAndGetLiteralArgument<String>(engine_args[1], "access_key_id");
|
||||
auto secret_key = checkAndGetLiteralArgument<String>(engine_args[2], "secret_access_key");
|
||||
|
||||
if (key_id.empty() || secret_key.empty() || boost::iequals(key_id, "NOSIGN"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, error_message.c_str());
|
||||
|
||||
result.access_key_id = key_id;
|
||||
result.secret_access_key = secret_key;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an empty vector because the database is read-only and no tables can be backed up
|
||||
*/
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseS3::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns an empty iterator because the database does not have its own tables
|
||||
* But only caches them for quick access
|
||||
*/
|
||||
DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const FilterByNameFunction &) const
|
||||
{
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
81
src/Databases/DatabaseS3.h
Normal file
81
src/Databases/DatabaseS3.h
Normal file
@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <mutex>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* DatabaseS3 provides access to data stored in S3.
|
||||
* Uses TableFunctionS3 to implicitly load file when a user requests the table,
|
||||
* and provides read-only access to the data in the file.
|
||||
* Tables are cached inside the database for quick access.
|
||||
*/
|
||||
class DatabaseS3 : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
struct Configuration
|
||||
{
|
||||
std::string url_prefix;
|
||||
|
||||
bool no_sign_request = false;
|
||||
|
||||
std::optional<std::string> access_key_id;
|
||||
std::optional<std::string> secret_access_key;
|
||||
};
|
||||
|
||||
DatabaseS3(const String & name, const Configuration& config, ContextPtr context);
|
||||
|
||||
String getEngineName() const override { return "S3"; }
|
||||
|
||||
bool isTableExist(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
// Contains only temporary tables
|
||||
bool shouldBeEmptyOnDetach() const override { return false; }
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override;
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override;
|
||||
|
||||
static Configuration parseArguments(ASTs engine_args, ContextPtr context);
|
||||
|
||||
protected:
|
||||
StoragePtr getTableImpl(const String & name, ContextPtr context) const;
|
||||
|
||||
void addTable(const std::string & table_name, StoragePtr table_storage) const;
|
||||
|
||||
bool checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const;
|
||||
|
||||
std::string getFullUrl(const std::string & name) const;
|
||||
|
||||
private:
|
||||
const Configuration config;
|
||||
|
||||
mutable Tables loaded_tables TSA_GUARDED_BY(mutex);
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
266
src/Databases/DatabasesOverlay.cpp
Normal file
266
src/Databases/DatabasesOverlay.cpp
Normal file
@ -0,0 +1,266 @@
|
||||
#include <Databases/DatabasesOverlay.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_GET_CREATE_TABLE_QUERY;
|
||||
}
|
||||
|
||||
DatabasesOverlay::DatabasesOverlay(const String & name_, ContextPtr context_)
|
||||
: IDatabase(name_), WithContext(context_->getGlobalContext()), log(&Poco::Logger::get("DatabaseOverlay(" + name_ + ")"))
|
||||
{
|
||||
}
|
||||
|
||||
DatabasesOverlay & DatabasesOverlay::registerNextDatabase(DatabasePtr database)
|
||||
{
|
||||
databases.push_back(std::move(database));
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool DatabasesOverlay::isTableExist(const String & table_name, ContextPtr context_) const
|
||||
{
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
if (db->isTableExist(table_name, context_))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
StoragePtr DatabasesOverlay::tryGetTable(const String & table_name, ContextPtr context_) const
|
||||
{
|
||||
StoragePtr result = nullptr;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->tryGetTable(table_name, context_);
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::createTable(ContextPtr context_, const String & table_name, const StoragePtr & table, const ASTPtr & query)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (!db->isReadOnly())
|
||||
{
|
||||
db->createTable(context_, table_name, table, query);
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for CREATE TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
void DatabasesOverlay::dropTable(ContextPtr context_, const String & table_name, bool sync)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (db->isTableExist(table_name, context_))
|
||||
{
|
||||
db->dropTable(context_, table_name, sync);
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for DROP TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
void DatabasesOverlay::attachTable(
|
||||
ContextPtr context_, const String & table_name, const StoragePtr & table, const String & relative_table_path)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
try
|
||||
{
|
||||
db->attachTable(context_, table_name, table, relative_table_path);
|
||||
return;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for ATTACH TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & table_name)
|
||||
{
|
||||
StoragePtr result = nullptr;
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (db->isTableExist(table_name, context_))
|
||||
return db->detachTable(context_, table_name);
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for DETACH TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
ASTPtr result = nullptr;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->tryGetCreateTableQuery(name, context_);
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
if (!result && throw_on_error)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY,
|
||||
"There is no metadata of table `{}` in database `{}` (engine {})",
|
||||
name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* DatabaseOverlay cannot be constructed by "CREATE DATABASE" query, as it is not a traditional ClickHouse database
|
||||
* To use DatabaseOverlay, it must be constructed programmatically in code
|
||||
*/
|
||||
ASTPtr DatabasesOverlay::getCreateDatabaseQuery() const
|
||||
{
|
||||
return std::make_shared<ASTCreateQuery>();
|
||||
}
|
||||
|
||||
String DatabasesOverlay::getTableDataPath(const String & table_name) const
|
||||
{
|
||||
String result;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->getTableDataPath(table_name);
|
||||
if (!result.empty())
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
String DatabasesOverlay::getTableDataPath(const ASTCreateQuery & query) const
|
||||
{
|
||||
String result;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->getTableDataPath(query);
|
||||
if (!result.empty())
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UUID DatabasesOverlay::tryGetTableUUID(const String & table_name) const
|
||||
{
|
||||
UUID result = UUIDHelpers::Nil;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->tryGetTableUUID(table_name);
|
||||
if (result != UUIDHelpers::Nil)
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::drop(ContextPtr context_)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
db->drop(context_);
|
||||
}
|
||||
|
||||
void DatabasesOverlay::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (!db->isReadOnly() && db->isTableExist(table_id.table_name, local_context))
|
||||
{
|
||||
db->alterTable(local_context, table_id, metadata);
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for ALTER TABLE `{}` query in database `{}` (engine {})",
|
||||
table_id.table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>>
|
||||
DatabasesOverlay::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const
|
||||
{
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> result;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
auto db_backup = db->getTablesForBackup(filter, local_context);
|
||||
result.insert(result.end(), std::make_move_iterator(db_backup.begin()), std::make_move_iterator(db_backup.end()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::createTableRestoredFromBackup(
|
||||
const ASTPtr & create_table_query,
|
||||
ContextMutablePtr local_context,
|
||||
std::shared_ptr<IRestoreCoordination> /*restore_coordination*/,
|
||||
UInt64 /*timeout_ms*/)
|
||||
{
|
||||
/// Creates a tables by executing a "CREATE TABLE" query.
|
||||
InterpreterCreateQuery interpreter{create_table_query, local_context};
|
||||
interpreter.setInternal(true);
|
||||
interpreter.execute();
|
||||
}
|
||||
|
||||
bool DatabasesOverlay::empty() const
|
||||
{
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
if (!db->empty())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::shutdown()
|
||||
{
|
||||
for (auto & db : databases)
|
||||
db->shutdown();
|
||||
}
|
||||
|
||||
DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context_, const FilterByNameFunction & filter_by_table_name) const
|
||||
{
|
||||
Tables tables;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
for (auto table_it = db->getTablesIterator(context_, filter_by_table_name); table_it->isValid(); table_it->next())
|
||||
tables.insert({table_it->name(), table_it->table()});
|
||||
}
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(std::move(tables), getDatabaseName());
|
||||
}
|
||||
|
||||
}
|
66
src/Databases/DatabasesOverlay.h
Normal file
66
src/Databases/DatabasesOverlay.h
Normal file
@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* Implements the IDatabase interface and combines multiple other databases
|
||||
* Searches for tables in each database in order until found, and delegates operations to the appropriate database
|
||||
* Useful for combining databases
|
||||
*
|
||||
* Used in clickhouse-local to combine DatabaseFileSystem and DatabaseMemory
|
||||
*/
|
||||
class DatabasesOverlay : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
DatabasesOverlay(const String & name_, ContextPtr context_);
|
||||
|
||||
/// Not thread-safe. Use only as factory to initialize database
|
||||
DatabasesOverlay & registerNextDatabase(DatabasePtr database);
|
||||
|
||||
String getEngineName() const override { return "Overlay"; }
|
||||
|
||||
public:
|
||||
bool isTableExist(const String & table_name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & table_name, ContextPtr context) const override;
|
||||
|
||||
void createTable(ContextPtr context, const String & table_name, const StoragePtr & table, const ASTPtr & query) override;
|
||||
|
||||
void dropTable(ContextPtr context, const String & table_name, bool sync) override;
|
||||
|
||||
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
|
||||
|
||||
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
|
||||
|
||||
ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override;
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
String getTableDataPath(const String & table_name) const override;
|
||||
String getTableDataPath(const ASTCreateQuery & query) const override;
|
||||
|
||||
UUID tryGetTableUUID(const String & table_name) const override;
|
||||
|
||||
void drop(ContextPtr context) override;
|
||||
|
||||
void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
|
||||
|
||||
void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms) override;
|
||||
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
protected:
|
||||
std::vector<DatabasePtr> databases;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
@ -170,7 +170,7 @@ public:
|
||||
/// Get the table for work. Return nullptr if there is no table.
|
||||
virtual StoragePtr tryGetTable(const String & name, ContextPtr context) const = 0;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const;
|
||||
virtual StoragePtr getTable(const String & name, ContextPtr context) const;
|
||||
|
||||
virtual UUID tryGetTableUUID(const String & /*table_name*/) const { return UUIDHelpers::Nil; }
|
||||
|
||||
@ -183,6 +183,8 @@ public:
|
||||
/// Is the database empty.
|
||||
virtual bool empty() const = 0;
|
||||
|
||||
virtual bool isReadOnly() const { return false; }
|
||||
|
||||
/// Add the table to the database. Record its presence in the metadata.
|
||||
virtual void createTable(
|
||||
ContextPtr /*context*/,
|
||||
|
@ -217,7 +217,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
|
||||
std::optional<Configuration> configuration;
|
||||
|
||||
std::string settings_config_prefix = config_prefix + ".clickhouse";
|
||||
auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr;
|
||||
auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr;
|
||||
|
||||
if (named_collection)
|
||||
{
|
||||
|
@ -71,7 +71,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
MySQLSettings mysql_settings;
|
||||
|
||||
std::optional<MySQLDictionarySource::Configuration> dictionary_configuration;
|
||||
auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr;
|
||||
auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr;
|
||||
if (named_collection)
|
||||
{
|
||||
auto allowed_arguments{dictionary_allowed_keys};
|
||||
|
@ -30,8 +30,6 @@
|
||||
#include <Dictionaries/RegExpTreeDictionary.h>
|
||||
#include <Dictionaries/YAMLRegExpTreeDictionarySource.h>
|
||||
|
||||
#include <re2_st/stringpiece.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_VECTORSCAN
|
||||
@ -469,10 +467,9 @@ public:
|
||||
|
||||
std::pair<String, bool> processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector<StringPiece> & pieces)
|
||||
{
|
||||
re2_st::StringPiece haystack(data.data(), data.size());
|
||||
re2_st::StringPiece matches[10];
|
||||
std::string_view matches[10];
|
||||
String result;
|
||||
searcher.Match(haystack, 0, data.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10);
|
||||
searcher.Match({data.data(), data.size()}, 0, data.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10);
|
||||
/// if the pattern is a single '$1' but fails to match, we would use the default value.
|
||||
if (pieces.size() == 1 && pieces[0].ref_num >= 0 && pieces[0].ref_num < 10 && matches[pieces[0].ref_num].empty())
|
||||
return std::make_pair(result, true);
|
||||
|
@ -285,19 +285,32 @@ private:
|
||||
};
|
||||
|
||||
DiskEncrypted::DiskEncrypted(
|
||||
const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_)
|
||||
: DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), use_fake_transaction_)
|
||||
const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_)
|
||||
: DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), config_, config_prefix_)
|
||||
{
|
||||
}
|
||||
|
||||
DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_, bool use_fake_transaction_)
|
||||
DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_,
|
||||
const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_)
|
||||
: IDisk(name_, config_, config_prefix_)
|
||||
, delegate(settings_->wrapped_disk)
|
||||
, encrypted_name(name_)
|
||||
, disk_path(settings_->disk_path)
|
||||
, disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path)
|
||||
, current_settings(std::move(settings_))
|
||||
, use_fake_transaction(config_.getBool(config_prefix_ + ".use_fake_transaction", true))
|
||||
{
|
||||
delegate->createDirectories(disk_path);
|
||||
}
|
||||
|
||||
DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_)
|
||||
: IDisk(name_)
|
||||
, delegate(settings_->wrapped_disk)
|
||||
, encrypted_name(name_)
|
||||
, disk_path(settings_->disk_path)
|
||||
, disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path)
|
||||
, current_settings(std::move(settings_))
|
||||
, use_fake_transaction(use_fake_transaction_)
|
||||
, use_fake_transaction(true)
|
||||
{
|
||||
delegate->createDirectories(disk_path);
|
||||
}
|
||||
@ -310,32 +323,6 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes)
|
||||
return std::make_unique<DiskEncryptedReservation>(std::static_pointer_cast<DiskEncrypted>(shared_from_this()), std::move(reservation));
|
||||
}
|
||||
|
||||
void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
|
||||
{
|
||||
/// Check if we can copy the file without deciphering.
|
||||
if (isSameDiskType(*this, *to_disk))
|
||||
{
|
||||
/// Disk type is the same, check if the key is the same too.
|
||||
if (auto * to_disk_enc = typeid_cast<DiskEncrypted *>(to_disk.get()))
|
||||
{
|
||||
auto from_settings = current_settings.get();
|
||||
auto to_settings = to_disk_enc->current_settings.get();
|
||||
if (from_settings->all_keys == to_settings->all_keys)
|
||||
{
|
||||
/// Keys are the same so we can simply copy the encrypted file.
|
||||
auto wrapped_from_path = wrappedPath(from_path);
|
||||
auto to_delegate = to_disk_enc->delegate;
|
||||
auto wrapped_to_path = to_disk_enc->wrappedPath(to_path);
|
||||
delegate->copy(wrapped_from_path, to_delegate, wrapped_to_path);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy the file through buffers with deciphering.
|
||||
copyThroughBuffers(from_path, to_disk, to_path);
|
||||
}
|
||||
|
||||
|
||||
void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
|
||||
{
|
||||
@ -359,11 +346,8 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha
|
||||
}
|
||||
}
|
||||
|
||||
if (!to_disk->exists(to_dir))
|
||||
to_disk->createDirectories(to_dir);
|
||||
|
||||
/// Copy the file through buffers with deciphering.
|
||||
copyThroughBuffers(from_dir, to_disk, to_dir);
|
||||
IDisk::copyDirectoryContent(from_dir, to_disk, to_dir);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
|
||||
@ -443,7 +427,7 @@ std::unordered_map<String, String> DiskEncrypted::getSerializedMetadata(const st
|
||||
|
||||
void DiskEncrypted::applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
ContextPtr /*context*/,
|
||||
ContextPtr context,
|
||||
const String & config_prefix,
|
||||
const DisksMap & disk_map)
|
||||
{
|
||||
@ -455,6 +439,7 @@ void DiskEncrypted::applyNewSettings(
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name);
|
||||
|
||||
current_settings.set(std::move(new_settings));
|
||||
IDisk::applyNewSettings(config, context, config_prefix, disk_map);
|
||||
}
|
||||
|
||||
void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check)
|
||||
@ -467,7 +452,7 @@ void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check)
|
||||
const DisksMap & map) -> DiskPtr
|
||||
{
|
||||
bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
|
||||
DiskPtr disk = std::make_shared<DiskEncrypted>(name, config, config_prefix, map, config.getBool(config_prefix + ".use_fake_transaction", true));
|
||||
DiskPtr disk = std::make_shared<DiskEncrypted>(name, config, config_prefix, map);
|
||||
disk->startup(context, skip_access_check);
|
||||
return disk;
|
||||
};
|
||||
|
@ -21,8 +21,10 @@ class WriteBufferFromFileBase;
|
||||
class DiskEncrypted : public IDisk
|
||||
{
|
||||
public:
|
||||
DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_);
|
||||
DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_, bool use_fake_transaction_);
|
||||
DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_);
|
||||
DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_,
|
||||
const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_);
|
||||
DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_);
|
||||
|
||||
const String & getName() const override { return encrypted_name; }
|
||||
const String & getPath() const override { return disk_absolute_path; }
|
||||
@ -110,8 +112,6 @@ public:
|
||||
delegate->listFiles(wrapped_path, file_names);
|
||||
}
|
||||
|
||||
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
|
||||
|
||||
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
|
@ -417,29 +417,12 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another)
|
||||
return typeid(one) == typeid(another);
|
||||
}
|
||||
|
||||
void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
|
||||
{
|
||||
if (isSameDiskType(*this, *to_disk))
|
||||
{
|
||||
fs::path to = fs::path(to_disk->getPath()) / to_path;
|
||||
fs::path from = fs::path(disk_path) / from_path;
|
||||
if (from_path.ends_with('/'))
|
||||
from = from.parent_path();
|
||||
if (fs::is_directory(from))
|
||||
to /= from.filename();
|
||||
|
||||
fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
|
||||
}
|
||||
else
|
||||
copyThroughBuffers(from_path, to_disk, to_path, /* copy_root_dir */ true); /// Base implementation.
|
||||
}
|
||||
|
||||
void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
|
||||
{
|
||||
if (isSameDiskType(*this, *to_disk))
|
||||
fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
|
||||
fs::copy(fs::path(disk_path) / from_dir, fs::path(to_disk->getPath()) / to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
|
||||
else
|
||||
copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); /// Base implementation.
|
||||
IDisk::copyDirectoryContent(from_dir, to_disk, to_dir);
|
||||
}
|
||||
|
||||
SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
|
||||
@ -448,7 +431,7 @@ SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
|
||||
}
|
||||
|
||||
|
||||
void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &)
|
||||
void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & disk_map)
|
||||
{
|
||||
String new_disk_path;
|
||||
UInt64 new_keep_free_space_bytes;
|
||||
@ -460,10 +443,13 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi
|
||||
|
||||
if (keep_free_space_bytes != new_keep_free_space_bytes)
|
||||
keep_free_space_bytes = new_keep_free_space_bytes;
|
||||
|
||||
IDisk::applyNewSettings(config, context, config_prefix, disk_map);
|
||||
}
|
||||
|
||||
DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_)
|
||||
: IDisk(name_)
|
||||
DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_,
|
||||
const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||
: IDisk(name_, config, config_prefix)
|
||||
, disk_path(path_)
|
||||
, keep_free_space_bytes(keep_free_space_bytes_)
|
||||
, logger(&Poco::Logger::get("DiskLocal"))
|
||||
@ -472,13 +458,24 @@ DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_fre
|
||||
}
|
||||
|
||||
DiskLocal::DiskLocal(
|
||||
const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, UInt64 local_disk_check_period_ms)
|
||||
: DiskLocal(name_, path_, keep_free_space_bytes_)
|
||||
const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context,
|
||||
const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||
: DiskLocal(name_, path_, keep_free_space_bytes_, config, config_prefix)
|
||||
{
|
||||
auto local_disk_check_period_ms = config.getUInt("local_disk_check_period_ms", 0);
|
||||
if (local_disk_check_period_ms > 0)
|
||||
disk_checker = std::make_unique<DiskLocalCheckThread>(this, context, local_disk_check_period_ms);
|
||||
}
|
||||
|
||||
DiskLocal::DiskLocal(const String & name_, const String & path_)
|
||||
: IDisk(name_)
|
||||
, disk_path(path_)
|
||||
, keep_free_space_bytes(0)
|
||||
, logger(&Poco::Logger::get("DiskLocal"))
|
||||
, data_source_description(getLocalDataSourceDescription(disk_path))
|
||||
{
|
||||
}
|
||||
|
||||
DataSourceDescription DiskLocal::getDataSourceDescription() const
|
||||
{
|
||||
return data_source_description;
|
||||
@ -720,7 +717,7 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check)
|
||||
|
||||
bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
|
||||
std::shared_ptr<IDisk> disk
|
||||
= std::make_shared<DiskLocal>(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0));
|
||||
= std::make_shared<DiskLocal>(name, path, keep_free_space_bytes, context, config, config_prefix);
|
||||
disk->startup(context, skip_access_check);
|
||||
return disk;
|
||||
};
|
||||
|
@ -19,13 +19,17 @@ public:
|
||||
friend class DiskLocalCheckThread;
|
||||
friend class DiskLocalReservation;
|
||||
|
||||
DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_);
|
||||
DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_,
|
||||
const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
|
||||
DiskLocal(
|
||||
const String & name_,
|
||||
const String & path_,
|
||||
UInt64 keep_free_space_bytes_,
|
||||
ContextPtr context,
|
||||
UInt64 local_disk_check_period_ms);
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix);
|
||||
|
||||
DiskLocal(const String & name_, const String & path_);
|
||||
|
||||
const String & getPath() const override { return disk_path; }
|
||||
|
||||
@ -63,8 +67,6 @@ public:
|
||||
|
||||
void replaceFile(const String & from_path, const String & to_path) override;
|
||||
|
||||
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
|
||||
|
||||
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
|
||||
|
||||
void listFiles(const String & path, std::vector<String> & file_names) const override;
|
||||
|
@ -53,7 +53,7 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config,
|
||||
disks.emplace(
|
||||
default_disk_name,
|
||||
std::make_shared<DiskLocal>(
|
||||
default_disk_name, context->getPath(), 0, context, config.getUInt("local_disk_check_period_ms", 0)));
|
||||
default_disk_name, context->getPath(), 0, context, config, config_prefix));
|
||||
}
|
||||
|
||||
is_initialized = true;
|
||||
|
@ -1,42 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <functional>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Interface to run task asynchronously with possibility to wait for execution.
|
||||
class Executor
|
||||
{
|
||||
public:
|
||||
virtual ~Executor() = default;
|
||||
virtual std::future<void> execute(std::function<void()> task) = 0;
|
||||
};
|
||||
|
||||
/// Executes task synchronously in case when disk doesn't support async operations.
|
||||
class SyncExecutor : public Executor
|
||||
{
|
||||
public:
|
||||
SyncExecutor() = default;
|
||||
std::future<void> execute(std::function<void()> task) override
|
||||
{
|
||||
auto promise = std::make_shared<std::promise<void>>();
|
||||
try
|
||||
{
|
||||
task();
|
||||
promise->set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
try
|
||||
{
|
||||
promise->set_exception(std::current_exception());
|
||||
}
|
||||
catch (...) { }
|
||||
}
|
||||
return promise->get_future();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
#include "IDisk.h"
|
||||
#include "Disks/Executor.h"
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <IO/WriteBufferFromFileBase.h>
|
||||
#include <IO/copyData.h>
|
||||
@ -80,18 +79,33 @@ UInt128 IDisk::getEncryptedFileIV(const String &) const
|
||||
|
||||
using ResultsCollector = std::vector<std::future<void>>;
|
||||
|
||||
void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings)
|
||||
void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, ThreadPool & pool, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings)
|
||||
{
|
||||
if (from_disk.isFile(from_path))
|
||||
{
|
||||
auto result = exec.execute(
|
||||
[&from_disk, from_path, &to_disk, to_path, &settings]()
|
||||
auto promise = std::make_shared<std::promise<void>>();
|
||||
auto future = promise->get_future();
|
||||
|
||||
pool.scheduleOrThrowOnError(
|
||||
[&from_disk, from_path, &to_disk, to_path, &settings, promise, thread_group = CurrentThread::getGroup()]()
|
||||
{
|
||||
setThreadName("DiskCopier");
|
||||
from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings);
|
||||
try
|
||||
{
|
||||
SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached(););
|
||||
|
||||
if (thread_group)
|
||||
CurrentThread::attachToGroup(thread_group);
|
||||
|
||||
from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings);
|
||||
promise->set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
promise->set_exception(std::current_exception());
|
||||
}
|
||||
});
|
||||
|
||||
results.push_back(std::move(result));
|
||||
results.push_back(std::move(future));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -104,13 +118,12 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
|
||||
}
|
||||
|
||||
for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
|
||||
asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true, settings);
|
||||
asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, settings);
|
||||
}
|
||||
}
|
||||
|
||||
void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir)
|
||||
{
|
||||
auto & exec = to_disk->getExecutor();
|
||||
ResultsCollector results;
|
||||
|
||||
WriteSettings settings;
|
||||
@ -118,17 +131,12 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<I
|
||||
/// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage
|
||||
settings.s3_allow_parallel_part_upload = false;
|
||||
|
||||
asyncCopy(*this, from_path, *to_disk, to_path, exec, results, copy_root_dir, settings);
|
||||
asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, settings);
|
||||
|
||||
for (auto & result : results)
|
||||
result.wait();
|
||||
for (auto & result : results)
|
||||
result.get();
|
||||
}
|
||||
|
||||
void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
|
||||
{
|
||||
copyThroughBuffers(from_path, to_disk, to_path, true);
|
||||
result.get(); /// May rethrow an exception
|
||||
}
|
||||
|
||||
|
||||
@ -137,7 +145,7 @@ void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr<
|
||||
if (!to_disk->exists(to_dir))
|
||||
to_disk->createDirectories(to_dir);
|
||||
|
||||
copyThroughBuffers(from_dir, to_disk, to_dir, false);
|
||||
copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false);
|
||||
}
|
||||
|
||||
void IDisk::truncateFile(const String &, size_t)
|
||||
@ -233,4 +241,9 @@ catch (Exception & e)
|
||||
throw;
|
||||
}
|
||||
|
||||
void IDisk::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr /*context*/, const String & config_prefix, const DisksMap & /*map*/)
|
||||
{
|
||||
copying_thread_pool.setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <base/types.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Disks/Executor.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
@ -35,6 +34,12 @@ namespace Poco
|
||||
}
|
||||
}
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric IDiskCopierThreads;
|
||||
extern const Metric IDiskCopierThreadsActive;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -110,9 +115,15 @@ class IDisk : public Space
|
||||
{
|
||||
public:
|
||||
/// Default constructor.
|
||||
explicit IDisk(const String & name_, std::shared_ptr<Executor> executor_ = std::make_shared<SyncExecutor>())
|
||||
IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||
: name(name_)
|
||||
, executor(executor_)
|
||||
, copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, config.getUInt(config_prefix + ".thread_pool_size", 16))
|
||||
{
|
||||
}
|
||||
|
||||
explicit IDisk(const String & name_)
|
||||
: name(name_)
|
||||
, copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, 16)
|
||||
{
|
||||
}
|
||||
|
||||
@ -181,9 +192,6 @@ public:
|
||||
/// If a file with `to_path` path already exists, it will be replaced.
|
||||
virtual void replaceFile(const String & from_path, const String & to_path) = 0;
|
||||
|
||||
/// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`.
|
||||
virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
|
||||
|
||||
/// Recursively copy files from from_dir to to_dir. Create to_dir if not exists.
|
||||
virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir);
|
||||
|
||||
@ -379,7 +387,7 @@ public:
|
||||
virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const;
|
||||
|
||||
/// Applies new settings for disk in runtime.
|
||||
virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr, const String &, const DisksMap &) {}
|
||||
virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map);
|
||||
|
||||
/// Quite leaky abstraction. Some disks can use additional disk to store
|
||||
/// some parts of metadata. In general case we have only one disk itself and
|
||||
@ -459,9 +467,6 @@ protected:
|
||||
|
||||
const String name;
|
||||
|
||||
/// Returns executor to perform asynchronous operations.
|
||||
virtual Executor & getExecutor() { return *executor; }
|
||||
|
||||
/// Base implementation of the function copy().
|
||||
/// It just opens two files, reads data by portions from the first file, and writes it to the second one.
|
||||
/// A derived class may override copy() to provide a faster implementation.
|
||||
@ -470,7 +475,7 @@ protected:
|
||||
virtual void checkAccessImpl(const String & path);
|
||||
|
||||
private:
|
||||
std::shared_ptr<Executor> executor;
|
||||
ThreadPool copying_thread_pool;
|
||||
bool is_custom_disk = false;
|
||||
|
||||
/// Check access to the disk.
|
||||
|
@ -74,19 +74,22 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile(
|
||||
}
|
||||
|
||||
void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog(
|
||||
const FileSegment::Range & file_segment_range, CachedOnDiskReadBufferFromFile::ReadType type)
|
||||
const FileSegment & file_segment, CachedOnDiskReadBufferFromFile::ReadType type)
|
||||
{
|
||||
if (!cache_log)
|
||||
return;
|
||||
|
||||
const auto range = file_segment.range();
|
||||
FilesystemCacheLogElement elem
|
||||
{
|
||||
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
|
||||
.query_id = query_id,
|
||||
.source_file_path = source_file_path,
|
||||
.file_segment_range = { file_segment_range.left, file_segment_range.right },
|
||||
.file_segment_range = { range.left, range.right },
|
||||
.requested_range = { first_offset, read_until_position },
|
||||
.file_segment_size = file_segment_range.size(),
|
||||
.file_segment_key = file_segment.key().toString(),
|
||||
.file_segment_offset = file_segment.offset(),
|
||||
.file_segment_size = range.size(),
|
||||
.read_from_cache_attempted = true,
|
||||
.read_buffer_id = current_buffer_id,
|
||||
.profile_counters = std::make_shared<ProfileEvents::Counters::Snapshot>(
|
||||
@ -495,7 +498,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
|
||||
auto completed_range = current_file_segment->range();
|
||||
|
||||
if (cache_log)
|
||||
appendFilesystemCacheLog(completed_range, read_type);
|
||||
appendFilesystemCacheLog(*current_file_segment, read_type);
|
||||
|
||||
chassert(file_offset_of_buffer_end > completed_range.right);
|
||||
|
||||
@ -518,7 +521,7 @@ CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile()
|
||||
{
|
||||
if (cache_log && file_segments && !file_segments->empty())
|
||||
{
|
||||
appendFilesystemCacheLog(file_segments->front().range(), read_type);
|
||||
appendFilesystemCacheLog(file_segments->front(), read_type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ private:
|
||||
|
||||
bool completeFileSegmentAndGetNext();
|
||||
|
||||
void appendFilesystemCacheLog(const FileSegment::Range & file_segment_range, ReadType read_type);
|
||||
void appendFilesystemCacheLog(const FileSegment & file_segment, ReadType read_type);
|
||||
|
||||
bool writeCache(char * data, size_t size, size_t offset, FileSegment & file_segment);
|
||||
|
||||
|
@ -90,6 +90,8 @@ void ReadBufferFromRemoteFSGather::appendUncachedReadInfo()
|
||||
.source_file_path = current_object.remote_path,
|
||||
.file_segment_range = { 0, current_object.bytes_size },
|
||||
.cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE,
|
||||
.file_segment_key = {},
|
||||
.file_segment_offset = {},
|
||||
.file_segment_size = current_object.bytes_size,
|
||||
.read_from_cache_attempted = false,
|
||||
};
|
||||
|
@ -31,9 +31,6 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access
|
||||
getAzureBlobContainerClient(config, config_prefix),
|
||||
getAzureBlobStorageSettings(config, config_prefix, context));
|
||||
|
||||
uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16);
|
||||
bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);
|
||||
|
||||
auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, "");
|
||||
|
||||
std::shared_ptr<IDisk> azure_blob_storage_disk = std::make_shared<DiskObjectStorage>(
|
||||
@ -42,8 +39,8 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access
|
||||
"DiskAzureBlobStorage",
|
||||
std::move(metadata_storage),
|
||||
std::move(azure_object_storage),
|
||||
send_metadata,
|
||||
copy_thread_pool_size
|
||||
config,
|
||||
config_prefix
|
||||
);
|
||||
|
||||
bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
|
||||
|
@ -18,12 +18,6 @@
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric DiskObjectStorageAsyncThreads;
|
||||
extern const Metric DiskObjectStorageAsyncThreadsActive;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -37,55 +31,6 @@ namespace ErrorCodes
|
||||
extern const int DIRECTORY_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Runs tasks asynchronously using thread pool.
|
||||
class AsyncThreadPoolExecutor : public Executor
|
||||
{
|
||||
public:
|
||||
AsyncThreadPoolExecutor(const String & name_, int thread_pool_size)
|
||||
: name(name_)
|
||||
, pool(CurrentMetrics::DiskObjectStorageAsyncThreads, CurrentMetrics::DiskObjectStorageAsyncThreadsActive, thread_pool_size)
|
||||
{}
|
||||
|
||||
std::future<void> execute(std::function<void()> task) override
|
||||
{
|
||||
auto promise = std::make_shared<std::promise<void>>();
|
||||
pool.scheduleOrThrowOnError(
|
||||
[promise, task]()
|
||||
{
|
||||
try
|
||||
{
|
||||
task();
|
||||
promise->set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("Failed to run async task");
|
||||
|
||||
try
|
||||
{
|
||||
promise->set_exception(std::current_exception());
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
});
|
||||
|
||||
return promise->get_future();
|
||||
}
|
||||
|
||||
void setMaxThreads(size_t threads)
|
||||
{
|
||||
pool.setMaxThreads(threads);
|
||||
}
|
||||
|
||||
private:
|
||||
String name;
|
||||
ThreadPool pool;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
DiskTransactionPtr DiskObjectStorage::createTransaction()
|
||||
{
|
||||
@ -105,27 +50,20 @@ DiskTransactionPtr DiskObjectStorage::createObjectStorageTransaction()
|
||||
send_metadata ? metadata_helper.get() : nullptr);
|
||||
}
|
||||
|
||||
std::shared_ptr<Executor> DiskObjectStorage::getAsyncExecutor(const std::string & log_name, size_t size)
|
||||
{
|
||||
static auto reader = std::make_shared<AsyncThreadPoolExecutor>(log_name, size);
|
||||
return reader;
|
||||
}
|
||||
|
||||
DiskObjectStorage::DiskObjectStorage(
|
||||
const String & name_,
|
||||
const String & object_storage_root_path_,
|
||||
const String & log_name,
|
||||
MetadataStoragePtr metadata_storage_,
|
||||
ObjectStoragePtr object_storage_,
|
||||
bool send_metadata_,
|
||||
uint64_t thread_pool_size_)
|
||||
: IDisk(name_, getAsyncExecutor(log_name, thread_pool_size_))
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix)
|
||||
: IDisk(name_, config, config_prefix)
|
||||
, object_storage_root_path(object_storage_root_path_)
|
||||
, log (&Poco::Logger::get("DiskObjectStorage(" + log_name + ")"))
|
||||
, metadata_storage(std::move(metadata_storage_))
|
||||
, object_storage(std::move(object_storage_))
|
||||
, send_metadata(send_metadata_)
|
||||
, threadpool_size(thread_pool_size_)
|
||||
, send_metadata(config.getBool(config_prefix + ".send_metadata", false))
|
||||
, metadata_helper(std::make_unique<DiskObjectStorageRemoteMetadataRestoreHelper>(this, ReadSettings{}))
|
||||
{}
|
||||
|
||||
@ -234,19 +172,23 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat
|
||||
transaction->commit();
|
||||
}
|
||||
|
||||
|
||||
void DiskObjectStorage::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
|
||||
void DiskObjectStorage::copyFile( /// NOLINT
|
||||
const String & from_file_path,
|
||||
IDisk & to_disk,
|
||||
const String & to_file_path,
|
||||
const WriteSettings & settings)
|
||||
{
|
||||
/// It's the same object storage disk
|
||||
if (this == to_disk.get())
|
||||
if (this == &to_disk)
|
||||
{
|
||||
/// It may use s3-server-side copy
|
||||
auto transaction = createObjectStorageTransaction();
|
||||
transaction->copyFile(from_path, to_path);
|
||||
transaction->copyFile(from_file_path, to_file_path);
|
||||
transaction->commit();
|
||||
}
|
||||
else
|
||||
{
|
||||
IDisk::copy(from_path, to_disk, to_path);
|
||||
/// Copy through buffers
|
||||
IDisk::copyFile(from_file_path, to_disk, to_file_path, settings);
|
||||
}
|
||||
}
|
||||
|
||||
@ -519,14 +461,15 @@ bool DiskObjectStorage::isWriteOnce() const
|
||||
|
||||
DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage()
|
||||
{
|
||||
const auto config_prefix = "storage_configuration.disks." + name;
|
||||
return std::make_shared<DiskObjectStorage>(
|
||||
getName(),
|
||||
object_storage_root_path,
|
||||
getName(),
|
||||
metadata_storage,
|
||||
object_storage,
|
||||
send_metadata,
|
||||
threadpool_size);
|
||||
Context::getGlobalContextInstance()->getConfigRef(),
|
||||
config_prefix);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
|
||||
@ -582,13 +525,12 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W
|
||||
}
|
||||
|
||||
void DiskObjectStorage::applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &)
|
||||
const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & /*config_prefix*/, const DisksMap & disk_map)
|
||||
{
|
||||
/// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name
|
||||
const auto config_prefix = "storage_configuration.disks." + name;
|
||||
object_storage->applyNewSettings(config, config_prefix, context_);
|
||||
|
||||
if (AsyncThreadPoolExecutor * exec = dynamic_cast<AsyncThreadPoolExecutor *>(&getExecutor()))
|
||||
exec->setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16));
|
||||
IDisk::applyNewSettings(config, context_, config_prefix, disk_map);
|
||||
}
|
||||
|
||||
void DiskObjectStorage::restoreMetadataIfNeeded(
|
||||
|
@ -33,8 +33,8 @@ public:
|
||||
const String & log_name,
|
||||
MetadataStoragePtr metadata_storage_,
|
||||
ObjectStoragePtr object_storage_,
|
||||
bool send_metadata_,
|
||||
uint64_t thread_pool_size_);
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix);
|
||||
|
||||
/// Create fake transaction
|
||||
DiskTransactionPtr createTransaction() override;
|
||||
@ -152,7 +152,11 @@ public:
|
||||
Strings getBlobPath(const String & path) const override;
|
||||
void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override;
|
||||
|
||||
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
|
||||
void copyFile( /// NOLINT
|
||||
const String & from_file_path,
|
||||
IDisk & to_disk,
|
||||
const String & to_file_path,
|
||||
const WriteSettings & settings = {}) override;
|
||||
|
||||
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override;
|
||||
|
||||
@ -198,8 +202,6 @@ public:
|
||||
NameSet getCacheLayersNames() const override;
|
||||
#endif
|
||||
|
||||
static std::shared_ptr<Executor> getAsyncExecutor(const std::string & log_name, size_t size);
|
||||
|
||||
bool supportsStat() const override { return metadata_storage->supportsStat(); }
|
||||
struct stat stat(const String & path) const override;
|
||||
|
||||
@ -225,7 +227,6 @@ private:
|
||||
std::optional<UInt64> tryReserve(UInt64 bytes);
|
||||
|
||||
const bool send_metadata;
|
||||
size_t threadpool_size;
|
||||
|
||||
std::unique_ptr<DiskObjectStorageRemoteMetadataRestoreHelper> metadata_helper;
|
||||
};
|
||||
|
@ -25,7 +25,7 @@ std::pair<String, DiskPtr> prepareForLocalMetadata(
|
||||
/// where the metadata files are stored locally
|
||||
auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context);
|
||||
fs::create_directories(metadata_path);
|
||||
auto metadata_disk = std::make_shared<DiskLocal>(name + "-metadata", metadata_path, 0);
|
||||
auto metadata_disk = std::make_shared<DiskLocal>(name + "-metadata", metadata_path, 0, config, config_prefix);
|
||||
return std::make_pair(metadata_path, metadata_disk);
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,14 @@
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric LocalThread;
|
||||
extern const Metric LocalThreadActive;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -101,7 +109,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateFileToRestorableSchema
|
||||
updateObjectMetadata(object.remote_path, metadata);
|
||||
}
|
||||
}
|
||||
void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results)
|
||||
void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, ThreadPool & pool)
|
||||
{
|
||||
checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks.
|
||||
|
||||
@ -120,29 +128,26 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecu
|
||||
/// The whole directory can be migrated asynchronously.
|
||||
if (dir_contains_only_files)
|
||||
{
|
||||
auto result = disk->getExecutor().execute([this, path]
|
||||
pool.scheduleOrThrowOnError([this, path]
|
||||
{
|
||||
for (auto it = disk->iterateDirectory(path); it->isValid(); it->next())
|
||||
migrateFileToRestorableSchema(it->path());
|
||||
});
|
||||
|
||||
results.push_back(std::move(result));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto it = disk->iterateDirectory(path); it->isValid(); it->next())
|
||||
if (!disk->isDirectory(it->path()))
|
||||
{
|
||||
if (disk->isDirectory(it->path()))
|
||||
{
|
||||
auto source_path = it->path();
|
||||
auto result = disk->getExecutor().execute([this, source_path]
|
||||
{
|
||||
migrateFileToRestorableSchema(source_path);
|
||||
});
|
||||
|
||||
results.push_back(std::move(result));
|
||||
migrateToRestorableSchemaRecursive(it->path(), pool);
|
||||
}
|
||||
else
|
||||
migrateToRestorableSchemaRecursive(it->path(), results);
|
||||
{
|
||||
auto source_path = it->path();
|
||||
pool.scheduleOrThrowOnError([this, source_path] { migrateFileToRestorableSchema(source_path); });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -153,16 +158,13 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchema()
|
||||
{
|
||||
LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name);
|
||||
|
||||
Futures results;
|
||||
ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive};
|
||||
|
||||
for (const auto & root : data_roots)
|
||||
if (disk->exists(root))
|
||||
migrateToRestorableSchemaRecursive(root + '/', results);
|
||||
migrateToRestorableSchemaRecursive(root + '/', pool);
|
||||
|
||||
for (auto & result : results)
|
||||
result.wait();
|
||||
for (auto & result : results)
|
||||
result.get();
|
||||
pool.wait();
|
||||
|
||||
saveSchemaVersion(RESTORABLE_SCHEMA_VERSION);
|
||||
}
|
||||
@ -355,8 +357,8 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage *
|
||||
{
|
||||
LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name);
|
||||
|
||||
std::vector<std::future<void>> results;
|
||||
auto restore_files = [this, &source_object_storage, &restore_information, &results](const RelativePathsWithMetadata & objects)
|
||||
ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive};
|
||||
auto restore_files = [this, &source_object_storage, &restore_information, &pool](const RelativePathsWithMetadata & objects)
|
||||
{
|
||||
std::vector<String> keys_names;
|
||||
for (const auto & object : objects)
|
||||
@ -378,12 +380,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage *
|
||||
|
||||
if (!keys_names.empty())
|
||||
{
|
||||
auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]()
|
||||
pool.scheduleOrThrowOnError([this, &source_object_storage, &restore_information, keys_names]()
|
||||
{
|
||||
processRestoreFiles(source_object_storage, restore_information.source_path, keys_names);
|
||||
});
|
||||
|
||||
results.push_back(std::move(result));
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -394,10 +394,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage *
|
||||
|
||||
restore_files(children);
|
||||
|
||||
for (auto & result : results)
|
||||
result.wait();
|
||||
for (auto & result : results)
|
||||
result.get();
|
||||
pool.wait();
|
||||
|
||||
LOG_INFO(disk->log, "Files are restored for disk {}", disk->name);
|
||||
|
||||
|
@ -75,7 +75,7 @@ private:
|
||||
void saveSchemaVersion(const int & version) const;
|
||||
void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const;
|
||||
void migrateFileToRestorableSchema(const String & path) const;
|
||||
void migrateToRestorableSchemaRecursive(const String & path, Futures & results);
|
||||
void migrateToRestorableSchemaRecursive(const String & path, ThreadPool & pool);
|
||||
|
||||
void readRestoreInformation(RestoreInformation & restore_information);
|
||||
void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information);
|
||||
|
@ -44,7 +44,6 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check)
|
||||
auto [_, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
|
||||
|
||||
auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri);
|
||||
uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16);
|
||||
bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
|
||||
|
||||
DiskPtr disk = std::make_shared<DiskObjectStorage>(
|
||||
@ -53,8 +52,8 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check)
|
||||
"DiskHDFS",
|
||||
std::move(metadata_storage),
|
||||
std::move(hdfs_storage),
|
||||
/* send_metadata = */ false,
|
||||
copy_thread_pool_size);
|
||||
config,
|
||||
config_prefix);
|
||||
disk->startup(context, skip_access_check);
|
||||
|
||||
return disk;
|
||||
|
@ -34,7 +34,7 @@ void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_acce
|
||||
metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, path);
|
||||
|
||||
auto disk = std::make_shared<DiskObjectStorage>(
|
||||
name, path, "Local", metadata_storage, local_storage, false, /* threadpool_size */16);
|
||||
name, path, "Local", metadata_storage, local_storage, config, config_prefix);
|
||||
disk->startup(context, global_skip_access_check);
|
||||
return disk;
|
||||
};
|
||||
|
@ -150,17 +150,14 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
|
||||
}
|
||||
}
|
||||
|
||||
bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);
|
||||
uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16);
|
||||
|
||||
DiskObjectStoragePtr s3disk = std::make_shared<DiskObjectStorage>(
|
||||
name,
|
||||
uri.key,
|
||||
type == "s3" ? "DiskS3" : "DiskS3Plain",
|
||||
std::move(metadata_storage),
|
||||
std::move(s3_storage),
|
||||
send_metadata,
|
||||
copy_thread_pool_size);
|
||||
config,
|
||||
config_prefix);
|
||||
|
||||
s3disk->startup(context, skip_access_check);
|
||||
|
||||
|
@ -52,8 +52,8 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check)
|
||||
"DiskWebServer",
|
||||
metadata_storage,
|
||||
object_storage,
|
||||
/* send_metadata */false,
|
||||
/* threadpool_size */16);
|
||||
config,
|
||||
config_prefix);
|
||||
disk->startup(context, skip_access_check);
|
||||
return disk;
|
||||
};
|
||||
|
@ -302,7 +302,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol
|
||||
for (const auto & volume : getVolumes())
|
||||
{
|
||||
if (!new_volume_names.contains(volume->getName()))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(name));
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"New storage policy {} shall contain volumes of the old storage policy {}",
|
||||
backQuote(new_storage_policy->getName()),
|
||||
backQuote(name));
|
||||
|
||||
std::unordered_set<String> new_disk_names;
|
||||
for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks())
|
||||
@ -310,7 +314,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol
|
||||
|
||||
for (const auto & disk : volume->getDisks())
|
||||
if (!new_disk_names.contains(disk->getName()))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(name));
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"New storage policy {} shall contain disks of the old storage policy {}",
|
||||
backQuote(new_storage_policy->getName()),
|
||||
backQuote(name));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ void loadDiskLocalConfig(const String & name,
|
||||
tmp_path = context->getPath();
|
||||
|
||||
// Create tmp disk for getting total disk space.
|
||||
keep_free_space_bytes = static_cast<UInt64>(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio);
|
||||
keep_free_space_bytes = static_cast<UInt64>(DiskLocal("tmp", tmp_path, 0, config, config_prefix).getTotalSpace() * ratio);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
void SetUp() override
|
||||
{
|
||||
fs::create_directories(tmp_root);
|
||||
disk = std::make_shared<DB::DiskLocal>("local_disk", tmp_root, 0);
|
||||
disk = std::make_shared<DB::DiskLocal>("local_disk", tmp_root);
|
||||
}
|
||||
|
||||
void TearDown() override
|
||||
|
@ -10,7 +10,7 @@ namespace fs = std::filesystem;
|
||||
DB::DiskPtr createDisk()
|
||||
{
|
||||
fs::create_directory("tmp/");
|
||||
return std::make_shared<DB::DiskLocal>("local_disk", "tmp/", 0);
|
||||
return std::make_shared<DB::DiskLocal>("local_disk", "tmp/");
|
||||
}
|
||||
|
||||
void destroyDisk(DB::DiskPtr & disk)
|
||||
|
@ -23,7 +23,7 @@ protected:
|
||||
/// Make local disk.
|
||||
temp_dir = std::make_unique<Poco::TemporaryFile>();
|
||||
temp_dir->createDirectories();
|
||||
local_disk = std::make_shared<DiskLocal>("local_disk", getDirectory(), 0);
|
||||
local_disk = std::make_shared<DiskLocal>("local_disk", getDirectory());
|
||||
}
|
||||
|
||||
void TearDown() override
|
||||
@ -42,7 +42,7 @@ protected:
|
||||
settings->current_key = key;
|
||||
settings->current_key_fingerprint = fingerprint;
|
||||
settings->disk_path = path;
|
||||
encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings), true);
|
||||
encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings));
|
||||
}
|
||||
|
||||
String getFileNames()
|
||||
|
@ -81,7 +81,7 @@ namespace impl
|
||||
|
||||
static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key)
|
||||
{
|
||||
SipHashKey ret;
|
||||
SipHashKey ret{};
|
||||
|
||||
const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get());
|
||||
if (!tuple)
|
||||
@ -90,6 +90,9 @@ namespace impl
|
||||
if (tuple->tupleSize() != 2)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
|
||||
|
||||
if (tuple->empty())
|
||||
return ret;
|
||||
|
||||
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
|
||||
ret.key0 = key0col->get64(0);
|
||||
else
|
||||
|
@ -99,8 +99,8 @@ struct ReplaceRegexpImpl
|
||||
int num_captures,
|
||||
const Instructions & instructions)
|
||||
{
|
||||
re2_st::StringPiece haystack(haystack_data, haystack_length);
|
||||
re2_st::StringPiece matches[max_captures];
|
||||
std::string_view haystack(haystack_data, haystack_length);
|
||||
std::string_view matches[max_captures];
|
||||
|
||||
size_t copy_pos = 0;
|
||||
size_t match_pos = 0;
|
||||
|
@ -45,8 +45,8 @@ bool isLargerThanFifty(std::string_view str)
|
||||
/// Check for sub-patterns of the form x{n} or x{n,} can be expensive. Ignore spaces before/after n and m.
|
||||
bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp)
|
||||
{
|
||||
re2_st::StringPiece haystack(regexp.data(), regexp.size());
|
||||
re2_st::StringPiece matches[2];
|
||||
std::string_view haystack(regexp.data(), regexp.size());
|
||||
std::string_view matches[2];
|
||||
size_t start_pos = 0;
|
||||
while (start_pos < haystack.size())
|
||||
{
|
||||
@ -67,8 +67,8 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp)
|
||||
/// Check if sub-patterns of the form x{n,m} can be expensive. Ignore spaces before/after n and m.
|
||||
bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp)
|
||||
{
|
||||
re2_st::StringPiece haystack(regexp.data(), regexp.size());
|
||||
re2_st::StringPiece matches[3];
|
||||
std::string_view haystack(regexp.data(), regexp.size());
|
||||
std::string_view matches[3];
|
||||
size_t start_pos = 0;
|
||||
while (start_pos < haystack.size())
|
||||
{
|
||||
|
@ -94,7 +94,6 @@ public:
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Length of 'needle' argument must be greater than 0.");
|
||||
|
||||
using StringPiece = typename Regexps::Regexp::StringPieceType;
|
||||
const Regexps::Regexp holder = Regexps::createRegexp<false, false, false>(needle);
|
||||
const auto & regexp = holder.getRE2();
|
||||
|
||||
@ -111,7 +110,7 @@ public:
|
||||
groups_count, std::to_string(MAX_GROUPS_COUNT - 1));
|
||||
|
||||
// Including 0-group, which is the whole regexp.
|
||||
PODArrayWithStackMemory<StringPiece, MAX_GROUPS_COUNT> matched_groups(groups_count + 1);
|
||||
PODArrayWithStackMemory<std::string_view, MAX_GROUPS_COUNT> matched_groups(groups_count + 1);
|
||||
|
||||
ColumnArray::ColumnOffsets::MutablePtr root_offsets_col = ColumnArray::ColumnOffsets::create();
|
||||
ColumnArray::ColumnOffsets::MutablePtr nested_offsets_col = ColumnArray::ColumnOffsets::create();
|
||||
@ -160,7 +159,7 @@ public:
|
||||
/// Additional limit to fail fast on supposedly incorrect usage.
|
||||
const auto max_matches_per_row = context->getSettingsRef().regexp_max_matches_per_row;
|
||||
|
||||
PODArray<StringPiece, 0> all_matches;
|
||||
PODArray<std::string_view, 0> all_matches;
|
||||
/// Number of times RE matched on each row of haystack column.
|
||||
PODArray<size_t, 0> number_of_matches_per_row;
|
||||
|
||||
|
@ -75,7 +75,7 @@ public:
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no groups in regexp: {}", needle);
|
||||
|
||||
// Including 0-group, which is the whole regexp.
|
||||
PODArrayWithStackMemory<re2_st::StringPiece, 128> matched_groups(groups_count + 1);
|
||||
PODArrayWithStackMemory<std::string_view, 128> matched_groups(groups_count + 1);
|
||||
|
||||
ColumnArray::ColumnOffsets::MutablePtr offsets_col = ColumnArray::ColumnOffsets::create();
|
||||
ColumnString::MutablePtr data_col = ColumnString::create();
|
||||
@ -89,7 +89,7 @@ public:
|
||||
{
|
||||
std::string_view current_row = column_haystack->getDataAt(i).toView();
|
||||
|
||||
if (re2->Match(re2_st::StringPiece(current_row.data(), current_row.size()),
|
||||
if (re2->Match({current_row.data(), current_row.size()},
|
||||
0, current_row.size(), re2_st::RE2::UNANCHORED, matched_groups.data(),
|
||||
static_cast<int>(matched_groups.size())))
|
||||
{
|
||||
|
@ -698,6 +698,8 @@ namespace
|
||||
|
||||
const DataTypePtr & from_type = arguments[0].type;
|
||||
|
||||
std::lock_guard lock(cache.mutex);
|
||||
|
||||
if (from_type->onlyNull())
|
||||
{
|
||||
cache.is_empty = true;
|
||||
@ -711,8 +713,6 @@ namespace
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Second and third arguments of function {} must be constant arrays.", getName());
|
||||
|
||||
std::lock_guard lock(cache.mutex);
|
||||
|
||||
const ColumnPtr & from_column_uncasted = array_from->getDataPtr();
|
||||
|
||||
cache.from_column = castColumn(
|
||||
|
@ -806,6 +806,13 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
|
||||
return true;
|
||||
}
|
||||
|
||||
void FileCache::removeKey(const Key & key)
|
||||
{
|
||||
assertInitialized();
|
||||
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW);
|
||||
locked_key->removeAll();
|
||||
}
|
||||
|
||||
void FileCache::removeKeyIfExists(const Key & key)
|
||||
{
|
||||
assertInitialized();
|
||||
@ -818,7 +825,14 @@ void FileCache::removeKeyIfExists(const Key & key)
|
||||
/// But if we have multiple replicated zero-copy tables on the same server
|
||||
/// it became possible to start removing something from cache when it is used
|
||||
/// by other "zero-copy" tables. That is why it's not an error.
|
||||
locked_key->removeAllReleasable();
|
||||
locked_key->removeAll(/* if_releasable */true);
|
||||
}
|
||||
|
||||
void FileCache::removeFileSegment(const Key & key, size_t offset)
|
||||
{
|
||||
assertInitialized();
|
||||
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW);
|
||||
locked_key->removeFileSegment(offset);
|
||||
}
|
||||
|
||||
void FileCache::removePathIfExists(const String & path)
|
||||
@ -830,22 +844,12 @@ void FileCache::removeAllReleasable()
|
||||
{
|
||||
assertInitialized();
|
||||
|
||||
auto lock = lockCache();
|
||||
|
||||
main_priority->iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata)
|
||||
{
|
||||
if (segment_metadata->releasable())
|
||||
{
|
||||
auto file_segment = segment_metadata->file_segment;
|
||||
locked_key.removeFileSegment(file_segment->offset(), file_segment->lock());
|
||||
return PriorityIterationResult::REMOVE_AND_CONTINUE;
|
||||
}
|
||||
return PriorityIterationResult::CONTINUE;
|
||||
}, lock);
|
||||
metadata.iterate([](LockedKey & locked_key) { locked_key.removeAll(/* if_releasable */true); });
|
||||
|
||||
if (stash)
|
||||
{
|
||||
/// Remove all access information.
|
||||
auto lock = lockCache();
|
||||
stash->records.clear();
|
||||
stash->queue->removeAll(lock);
|
||||
}
|
||||
@ -915,7 +919,7 @@ void FileCache::loadMetadata()
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto key = Key(unhexUInt<UInt128>(key_directory.filename().string().data()));
|
||||
const auto key = Key::fromKeyString(key_directory.filename().string());
|
||||
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true);
|
||||
|
||||
for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it)
|
||||
@ -1070,7 +1074,7 @@ FileSegmentsHolderPtr FileCache::getSnapshot()
|
||||
FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key)
|
||||
{
|
||||
FileSegments file_segments;
|
||||
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW);
|
||||
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW_LOGICAL);
|
||||
for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata())
|
||||
file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment));
|
||||
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
|
||||
|
@ -83,13 +83,19 @@ public:
|
||||
|
||||
FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
|
||||
|
||||
/// Remove files by `key`. Removes files which might be used at the moment.
|
||||
/// Remove file segment by `key` and `offset`. Throws if file segment does not exist.
|
||||
void removeFileSegment(const Key & key, size_t offset);
|
||||
|
||||
/// Remove files by `key`. Throws if key does not exist.
|
||||
void removeKey(const Key & key);
|
||||
|
||||
/// Remove files by `key`.
|
||||
void removeKeyIfExists(const Key & key);
|
||||
|
||||
/// Removes files by `path`. Removes files which might be used at the moment.
|
||||
/// Removes files by `path`.
|
||||
void removePathIfExists(const String & path);
|
||||
|
||||
/// Remove files by `key`. Will not remove files which are used at the moment.
|
||||
/// Remove files by `key`.
|
||||
void removeAllReleasable();
|
||||
|
||||
std::vector<String> tryGetCachePaths(const Key & key);
|
||||
|
@ -28,4 +28,9 @@ FileCacheKey FileCacheKey::random()
|
||||
return FileCacheKey(UUIDHelpers::generateV4().toUnderType());
|
||||
}
|
||||
|
||||
FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str)
|
||||
{
|
||||
return FileCacheKey(unhexUInt<UInt128>(key_str.data()));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -21,6 +21,8 @@ struct FileCacheKey
|
||||
static FileCacheKey random();
|
||||
|
||||
bool operator==(const FileCacheKey & other) const { return key == other.key; }
|
||||
|
||||
static FileCacheKey fromKeyString(const std::string & key_str);
|
||||
};
|
||||
|
||||
using FileCacheKeyAndOffset = std::pair<FileCacheKey, size_t>;
|
||||
|
@ -25,6 +25,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_)
|
||||
@ -191,6 +192,8 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata(
|
||||
if (it == end())
|
||||
{
|
||||
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key);
|
||||
else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
|
||||
else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
|
||||
return nullptr;
|
||||
@ -215,6 +218,8 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata(
|
||||
return locked_metadata;
|
||||
|
||||
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key);
|
||||
else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
|
||||
|
||||
if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
|
||||
@ -333,11 +338,11 @@ class DownloadQueue
|
||||
{
|
||||
friend struct CacheMetadata;
|
||||
public:
|
||||
void add(std::weak_ptr<FileSegment> file_segment)
|
||||
void add(FileSegmentPtr file_segment)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
queue.push(file_segment);
|
||||
queue.emplace(file_segment->key(), file_segment->offset(), file_segment);
|
||||
}
|
||||
|
||||
CurrentMetrics::add(CurrentMetrics::FilesystemCacheDownloadQueueElements);
|
||||
@ -356,8 +361,19 @@ private:
|
||||
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
std::queue<std::weak_ptr<FileSegment>> queue;
|
||||
bool cancelled = false;
|
||||
|
||||
struct DownloadInfo
|
||||
{
|
||||
CacheMetadata::Key key;
|
||||
size_t offset;
|
||||
/// We keep weak pointer to file segment
|
||||
/// instead of just getting it from file_segment_metadata,
|
||||
/// because file segment at key:offset count be removed and added back to metadata
|
||||
/// before we actually started background download.
|
||||
std::weak_ptr<FileSegment> file_segment;
|
||||
};
|
||||
std::queue<DownloadInfo> queue;
|
||||
};
|
||||
|
||||
void CacheMetadata::downloadThreadFunc()
|
||||
@ -365,6 +381,8 @@ void CacheMetadata::downloadThreadFunc()
|
||||
std::optional<Memory<>> memory;
|
||||
while (true)
|
||||
{
|
||||
Key key;
|
||||
size_t offset;
|
||||
std::weak_ptr<FileSegment> file_segment_weak;
|
||||
|
||||
{
|
||||
@ -379,7 +397,11 @@ void CacheMetadata::downloadThreadFunc()
|
||||
continue;
|
||||
}
|
||||
|
||||
file_segment_weak = download_queue->queue.front();
|
||||
auto entry = download_queue->queue.front();
|
||||
key = entry.key;
|
||||
offset = entry.offset;
|
||||
file_segment_weak = entry.file_segment;
|
||||
|
||||
download_queue->queue.pop();
|
||||
}
|
||||
|
||||
@ -389,19 +411,21 @@ void CacheMetadata::downloadThreadFunc()
|
||||
try
|
||||
{
|
||||
{
|
||||
auto file_segment = file_segment_weak.lock();
|
||||
if (!file_segment
|
||||
|| file_segment->state() != FileSegment::State::PARTIALLY_DOWNLOADED)
|
||||
continue;
|
||||
|
||||
auto locked_key = lockKeyMetadata(file_segment->key(), KeyNotFoundPolicy::RETURN_NULL);
|
||||
auto locked_key = lockKeyMetadata(key, KeyNotFoundPolicy::RETURN_NULL);
|
||||
if (!locked_key)
|
||||
continue;
|
||||
|
||||
auto file_segment_metadata = locked_key->tryGetByOffset(file_segment->offset());
|
||||
auto file_segment_metadata = locked_key->tryGetByOffset(offset);
|
||||
if (!file_segment_metadata || file_segment_metadata->evicting())
|
||||
continue;
|
||||
|
||||
auto file_segment = file_segment_weak.lock();
|
||||
|
||||
if (!file_segment
|
||||
|| file_segment != file_segment_metadata->file_segment
|
||||
|| file_segment->state() != FileSegment::State::PARTIALLY_DOWNLOADED)
|
||||
continue;
|
||||
|
||||
holder = std::make_unique<FileSegmentsHolder>(FileSegments{file_segment});
|
||||
}
|
||||
|
||||
@ -539,11 +563,11 @@ bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const
|
||||
return file_segment_metadata->file_segment.use_count() == 2;
|
||||
}
|
||||
|
||||
void LockedKey::removeAllReleasable()
|
||||
void LockedKey::removeAll(bool if_releasable)
|
||||
{
|
||||
for (auto it = key_metadata->begin(); it != key_metadata->end();)
|
||||
{
|
||||
if (!it->second->releasable())
|
||||
if (if_releasable && !it->second->releasable())
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
@ -564,17 +588,32 @@ void LockedKey::removeAllReleasable()
|
||||
}
|
||||
}
|
||||
|
||||
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset)
|
||||
{
|
||||
auto it = key_metadata->find(offset);
|
||||
if (it == key_metadata->end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset);
|
||||
|
||||
auto file_segment = it->second->file_segment;
|
||||
return removeFileSegmentImpl(it, file_segment->lock());
|
||||
}
|
||||
|
||||
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock)
|
||||
{
|
||||
auto it = key_metadata->find(offset);
|
||||
if (it == key_metadata->end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset);
|
||||
|
||||
return removeFileSegmentImpl(it, segment_lock);
|
||||
}
|
||||
|
||||
KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock)
|
||||
{
|
||||
auto file_segment = it->second->file_segment;
|
||||
|
||||
LOG_DEBUG(
|
||||
key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}",
|
||||
getKey(), offset, file_segment->reserved_size);
|
||||
getKey(), file_segment->offset(), file_segment->reserved_size);
|
||||
|
||||
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
|
||||
|
||||
|
@ -87,7 +87,7 @@ struct CacheMetadata : public std::unordered_map<FileCacheKey, KeyMetadataPtr>,
|
||||
{
|
||||
public:
|
||||
using Key = FileCacheKey;
|
||||
using IterateCacheMetadataFunc = std::function<void(const LockedKey &)>;
|
||||
using IterateCacheMetadataFunc = std::function<void(LockedKey &)>;
|
||||
|
||||
explicit CacheMetadata(const std::string & path_);
|
||||
|
||||
@ -106,6 +106,7 @@ public:
|
||||
enum class KeyNotFoundPolicy
|
||||
{
|
||||
THROW,
|
||||
THROW_LOGICAL,
|
||||
CREATE_EMPTY,
|
||||
RETURN_NULL,
|
||||
};
|
||||
@ -169,9 +170,10 @@ struct LockedKey : private boost::noncopyable
|
||||
std::shared_ptr<const KeyMetadata> getKeyMetadata() const { return key_metadata; }
|
||||
std::shared_ptr<KeyMetadata> getKeyMetadata() { return key_metadata; }
|
||||
|
||||
void removeAllReleasable();
|
||||
void removeAll(bool if_releasable = true);
|
||||
|
||||
KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &);
|
||||
KeyMetadata::iterator removeFileSegment(size_t offset);
|
||||
|
||||
void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &);
|
||||
|
||||
@ -188,6 +190,8 @@ struct LockedKey : private boost::noncopyable
|
||||
std::string toString() const;
|
||||
|
||||
private:
|
||||
KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &);
|
||||
|
||||
const std::shared_ptr<KeyMetadata> key_metadata;
|
||||
KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`.
|
||||
};
|
||||
|
@ -49,7 +49,7 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<Tabl
|
||||
}
|
||||
}
|
||||
|
||||
bool ConcurrentHashJoin::addJoinedBlock(const Block & right_block, bool check_limits)
|
||||
bool ConcurrentHashJoin::addBlockToJoin(const Block & right_block, bool check_limits)
|
||||
{
|
||||
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_right, right_block);
|
||||
|
||||
@ -77,7 +77,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & right_block, bool check_li
|
||||
if (!lock.owns_lock())
|
||||
continue;
|
||||
|
||||
bool limit_exceeded = !hash_join->data->addJoinedBlock(dispatched_block, check_limits);
|
||||
bool limit_exceeded = !hash_join->data->addBlockToJoin(dispatched_block, check_limits);
|
||||
|
||||
dispatched_block = {};
|
||||
blocks_left--;
|
||||
|
@ -16,13 +16,13 @@ namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by
|
||||
* Can run addBlockToJoin() parallelly to speedup the join process. On test, it almose linear speedup by
|
||||
* the degree of parallelism.
|
||||
*
|
||||
* The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When
|
||||
* the right table is large, the join process is too slow.
|
||||
*
|
||||
* We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks
|
||||
* We create multiple HashJoin instances here. In addBlockToJoin(), one input block is split into multiple blocks
|
||||
* corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin
|
||||
* instance is written by only one thread.
|
||||
*
|
||||
@ -37,7 +37,7 @@ public:
|
||||
~ConcurrentHashJoin() override = default;
|
||||
|
||||
const TableJoin & getTableJoin() const override { return *table_join; }
|
||||
bool addJoinedBlock(const Block & block, bool check_limits) override;
|
||||
bool addBlockToJoin(const Block & block, bool check_limits) override;
|
||||
void checkTypesOfKeys(const Block & block) const override;
|
||||
void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) override;
|
||||
void setTotals(const Block & block) override;
|
||||
|
@ -875,9 +875,9 @@ catch (...)
|
||||
"It is ok to skip this exception as cleaning old temporary files is not necessary", path));
|
||||
}
|
||||
|
||||
static VolumePtr createLocalSingleDiskVolume(const std::string & path)
|
||||
static VolumePtr createLocalSingleDiskVolume(const std::string & path, const Poco::Util::AbstractConfiguration & config_)
|
||||
{
|
||||
auto disk = std::make_shared<DiskLocal>("_tmp_default", path, 0);
|
||||
auto disk = std::make_shared<DiskLocal>("_tmp_default", path, 0, config_, "storage_configuration.disks._tmp_default");
|
||||
VolumePtr volume = std::make_shared<SingleDiskVolume>("_tmp_default", disk, 0);
|
||||
return volume;
|
||||
}
|
||||
@ -893,7 +893,7 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size)
|
||||
if (!shared->tmp_path.ends_with('/'))
|
||||
shared->tmp_path += '/';
|
||||
|
||||
VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path);
|
||||
VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRef());
|
||||
|
||||
for (const auto & disk : volume->getDisks())
|
||||
{
|
||||
@ -966,7 +966,7 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t
|
||||
LOG_DEBUG(shared->log, "Using file cache ({}) for temporary files", file_cache->getBasePath());
|
||||
|
||||
shared->tmp_path = file_cache->getBasePath();
|
||||
VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path);
|
||||
VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRef());
|
||||
shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, file_cache.get(), max_size);
|
||||
}
|
||||
|
||||
|
@ -356,7 +356,8 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
|
||||
auto table = database->tryGetTable(table_id.table_name, context_);
|
||||
if (!table && exception)
|
||||
exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs()));
|
||||
exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs()));
|
||||
|
||||
if (!table)
|
||||
database = nullptr;
|
||||
|
||||
|
@ -103,7 +103,7 @@ DirectKeyValueJoin::DirectKeyValueJoin(
|
||||
right_sample_block_with_storage_column_names = right_sample_block_with_storage_column_names_;
|
||||
}
|
||||
|
||||
bool DirectKeyValueJoin::addJoinedBlock(const Block &, bool)
|
||||
bool DirectKeyValueJoin::addBlockToJoin(const Block &, bool)
|
||||
{
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached");
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user